metadata.yaml 171 KB


  1. plugin_name: proc.plugin
  2. modules:
  3. - meta:
  4. plugin_name: proc.plugin
  5. module_name: /proc/stat
  6. monitored_instance:
  7. name: System statistics
  8. link: ""
  9. categories:
  10. - data-collection.linux-systems.system-metrics
  11. icon_filename: "linuxserver.svg"
  12. related_resources:
  13. integrations:
  14. list: []
  15. info_provided_to_referring_integrations:
  16. description: ""
  17. keywords:
  18. - cpu utilization
  19. - process counts
  20. most_popular: false
  21. overview:
  22. data_collection:
  23. metrics_description: |
  24. CPU utilization, states and frequencies and key Linux system performance metrics.
  25. The `/proc/stat` file provides various types of system statistics:
  26. - The overall system CPU usage statistics
  27. - Per CPU core statistics
  28. - The total context switching of the system
  29. - The total number of processes running
  30. - The total CPU interrupts
  31. - The total CPU softirqs
  32. The collector also reads:
  33. - `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
  34. - `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
  35. - `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
  36. - `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
  37. - `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
  38. - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
  39. - `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
  40. method_description: ""
  41. supported_platforms:
  42. include: ["linux"]
  43. exclude: []
  44. multi_instance: false
  45. additional_permissions:
  46. description: ""
  47. default_behavior:
  48. auto_detection:
  49. description: |
  50. The collector auto-detects all metrics. No configuration is needed.
  51. limits:
  52. description: ""
  53. performance_impact:
  54. description: |
  55. The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
  56. setup:
  57. prerequisites:
  58. list: []
  59. configuration:
  60. file:
  61. section_name: "plugin:proc:/proc/stat"
  62. name: "netdata.conf"
  63. description: ""
  64. options:
  65. description: ""
  66. folding:
  67. title: ""
  68. enabled: true
  69. list: []
  70. examples:
  71. folding:
  72. enabled: true
  73. title: ""
  74. list: []
  75. troubleshooting:
  76. problems:
  77. list: []
  78. alerts:
  79. - name: 10min_cpu_usage
  80. link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
  81. metric: system.cpu
  82. info: average CPU utilization over the last 10 minutes (excluding iowait, nice and steal)
  83. os: "linux"
  84. - name: 10min_cpu_iowait
  85. link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
  86. metric: system.cpu
  87. info: average CPU iowait time over the last 10 minutes
  88. os: "linux"
  89. - name: 20min_steal_cpu
  90. link: https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf
  91. metric: system.cpu
  92. info: average CPU steal time over the last 20 minutes
  93. os: "linux"
  94. metrics:
  95. folding:
  96. title: Metrics
  97. enabled: false
  98. description: ""
  99. availability: []
  100. scopes:
  101. - name: global
  102. description: ""
  103. labels: []
  104. metrics:
  105. - name: system.cpu
  106. description: Total CPU utilization
  107. unit: "percentage"
  108. chart_type: stacked
  109. dimensions:
  110. - name: guest_nice
  111. - name: guest
  112. - name: steal
  113. - name: softirq
  114. - name: irq
  115. - name: user
  116. - name: system
  117. - name: nice
  118. - name: iowait
  119. - name: idle
  120. - name: system.intr
  121. description: CPU Interrupts
  122. unit: "interrupts/s"
  123. chart_type: line
  124. dimensions:
  125. - name: interrupts
  126. - name: system.ctxt
  127. description: CPU Context Switches
  128. unit: "context switches/s"
  129. chart_type: line
  130. dimensions:
  131. - name: switches
  132. - name: system.forks
  133. description: Started Processes
  134. unit: "processes/s"
  135. chart_type: line
  136. dimensions:
  137. - name: started
  138. - name: system.processes
  139. description: System Processes
  140. unit: "processes"
  141. chart_type: line
  142. dimensions:
  143. - name: running
  144. - name: blocked
  145. - name: cpu.core_throttling
  146. description: Core Thermal Throttling Events
  147. unit: "events/s"
  148. chart_type: line
  149. dimensions:
  150. - name: a dimension per cpu core
  151. - name: cpu.package_throttling
  152. description: Package Thermal Throttling Events
  153. unit: "events/s"
  154. chart_type: line
  155. dimensions:
  156. - name: a dimension per package
  157. - name: cpu.cpufreq
  158. description: Current CPU Frequency
  159. unit: "MHz"
  160. chart_type: line
  161. dimensions:
  162. - name: a dimension per cpu core
  163. - name: cpu core
  164. description: ""
  165. labels:
  166. - name: cpu
  167. description: TBD
  168. metrics:
  169. - name: cpu.cpu
  170. description: Core utilization
  171. unit: "percentage"
  172. chart_type: stacked
  173. dimensions:
  174. - name: guest_nice
  175. - name: guest
  176. - name: steal
  177. - name: softirq
  178. - name: irq
  179. - name: user
  180. - name: system
  181. - name: nice
  182. - name: iowait
  183. - name: idle
  184. - name: cpuidle.cpu_cstate_residency_time
  185. description: C-state residency time
  186. unit: "percentage"
  187. chart_type: stacked
  188. dimensions:
  189. - name: a dimension per c-state
  190. - meta:
  191. plugin_name: proc.plugin
  192. module_name: /proc/sys/kernel/random/entropy_avail
  193. monitored_instance:
  194. name: Entropy
  195. link: ""
  196. categories:
  197. - data-collection.linux-systems.system-metrics
  198. icon_filename: "syslog.png"
  199. related_resources:
  200. integrations:
  201. list: []
  202. info_provided_to_referring_integrations:
  203. description: ""
  204. keywords:
  205. - entropy
  206. most_popular: false
  207. overview:
  208. data_collection:
  209. metrics_description: |
  210. Entropy, a measure of the randomness or unpredictability of data.
  211. In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
  212. secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
  213. vulnerable to attacks that exploit the predictability of the generated keys.
  214. In most operating systems, entropy is generated by collecting random events from various sources, such as
  215. hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
  216. of entropy, which is then used to generate random numbers when needed.
  217. The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
  218. to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
  219. which blocks until enough entropy is available to generate the requested numbers. This ensures that the
  220. generated numbers are truly random and not predictable.
  221. However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
  222. programs that rely on random numbers to slow down or even freeze. This is especially problematic for
  223. cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
  224. To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
  225. entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
  226. radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
  227. software-based sources.
  228. One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
  229. for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
  230. high-quality entropy, which can be used to seed the pool of entropy in the operating system.
  231. Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
  232. exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
  233. can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
  234. method_description: ""
  235. supported_platforms:
  236. include: ["linux"]
  237. exclude: []
  238. multi_instance: false
  239. additional_permissions:
  240. description: ""
  241. default_behavior:
  242. auto_detection:
  243. description: ""
  244. limits:
  245. description: ""
  246. performance_impact:
  247. description: ""
  248. setup:
  249. prerequisites:
  250. list: []
  251. configuration:
  252. file:
  253. name: ""
  254. description: ""
  255. options:
  256. description: ""
  257. folding:
  258. title: ""
  259. enabled: true
  260. list: []
  261. examples:
  262. folding:
  263. enabled: true
  264. title: ""
  265. list: []
  266. troubleshooting:
  267. problems:
  268. list: []
  269. alerts:
  270. - name: lowest_entropy
  271. link: https://github.com/netdata/netdata/blob/master/health/health.d/entropy.conf
  272. metric: system.entropy
  273. info: minimum number of bits of entropy available for the kernel’s random number generator
  274. metrics:
  275. folding:
  276. title: Metrics
  277. enabled: false
  278. description: ""
  279. availability: []
  280. scopes:
  281. - name: global
  282. description: ""
  283. labels: []
  284. metrics:
  285. - name: system.entropy
  286. description: Available Entropy
  287. unit: "entropy"
  288. chart_type: line
  289. dimensions:
  290. - name: entropy
  291. - meta:
  292. plugin_name: proc.plugin
  293. module_name: /proc/uptime
  294. monitored_instance:
  295. name: System Uptime
  296. link: ""
  297. categories:
  298. - data-collection.linux-systems.system-metrics
  299. icon_filename: "linuxserver.svg"
  300. related_resources:
  301. integrations:
  302. list: []
  303. info_provided_to_referring_integrations:
  304. description: ""
  305. keywords:
  306. - uptime
  307. most_popular: false
  308. overview:
  309. data_collection:
  310. metrics_description: |
  311. The amount of time the system has been up (running).
  312. Uptime is a critical aspect of overall system performance:
  313. - **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
  314. - **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
  315. - **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
  316. - **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
  317. - **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
  318. - **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
  319. - **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
  320. method_description: ""
  321. supported_platforms:
  322. include: ["linux"]
  323. exclude: []
  324. multi_instance: false
  325. additional_permissions:
  326. description: ""
  327. default_behavior:
  328. auto_detection:
  329. description: ""
  330. limits:
  331. description: ""
  332. performance_impact:
  333. description: ""
  334. setup:
  335. prerequisites:
  336. list: []
  337. configuration:
  338. file:
  339. name: ""
  340. description: ""
  341. options:
  342. description: ""
  343. folding:
  344. title: ""
  345. enabled: true
  346. list: []
  347. examples:
  348. folding:
  349. enabled: true
  350. title: ""
  351. list: []
  352. troubleshooting:
  353. problems:
  354. list: []
  355. alerts: []
  356. metrics:
  357. folding:
  358. title: Metrics
  359. enabled: false
  360. description: ""
  361. availability: []
  362. scopes:
  363. - name: global
  364. description: ""
  365. labels: []
  366. metrics:
  367. - name: system.uptime
  368. description: System Uptime
  369. unit: "seconds"
  370. chart_type: line
  371. dimensions:
  372. - name: uptime
  373. - meta:
  374. plugin_name: proc.plugin
  375. module_name: /proc/vmstat
  376. monitored_instance:
  377. name: Memory Statistics
  378. link: ""
  379. categories:
  380. - data-collection.linux-systems.memory-metrics
  381. icon_filename: "linuxserver.svg"
  382. related_resources:
  383. integrations:
  384. list: []
  385. info_provided_to_referring_integrations:
  386. description: ""
  387. keywords:
  388. - swap
  389. - page faults
  390. - oom
  391. - numa
  392. most_popular: false
  393. overview:
  394. data_collection:
  395. metrics_description: |
  396. Linux Virtual memory subsystem.
  397. Information about memory management, indicating how effectively the kernel allocates and frees
  398. memory resources in response to system demands.
  399. Monitors page faults, which occur when a process requests a portion of its memory that isn't
  400. immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
  401. provide insights into application behavior.
  402. Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
  403. swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
  404. a compressed cache for swap pages, and provides insights into its usage and performance implications.
  405. In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
  406. memory resources between host and guest systems.
  407. For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
  408. can impact the performance based on the memory access times.
  409. The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
  410. of memory resources.
  411. method_description: ""
  412. supported_platforms:
  413. include: ["linux"]
  414. exclude: []
  415. multi_instance: false
  416. additional_permissions:
  417. description: ""
  418. default_behavior:
  419. auto_detection:
  420. description: ""
  421. limits:
  422. description: ""
  423. performance_impact:
  424. description: ""
  425. setup:
  426. prerequisites:
  427. list: []
  428. configuration:
  429. file:
  430. name: ""
  431. description: ""
  432. options:
  433. description: ""
  434. folding:
  435. title: ""
  436. enabled: true
  437. list: []
  438. examples:
  439. folding:
  440. enabled: true
  441. title: ""
  442. list: []
  443. troubleshooting:
  444. problems:
  445. list: []
  446. alerts:
  447. - name: 30min_ram_swapped_out
  448. link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
  449. metric: mem.swapio
  450. info: percentage of the system RAM swapped in the last 30 minutes
  451. os: "linux freebsd"
  452. - name: oom_kill
  453. link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
  454. metric: mem.oom_kill
  455. info: number of out of memory kills in the last 30 minutes
  456. os: "linux"
  457. metrics:
  458. folding:
  459. title: Metrics
  460. enabled: false
  461. description: ""
  462. availability: []
  463. scopes:
  464. - name: global
  465. description: ""
  466. labels: []
  467. metrics:
  468. - name: mem.swapio
  469. description: Swap I/O
  470. unit: "KiB/s"
  471. chart_type: area
  472. dimensions:
  473. - name: in
  474. - name: out
  475. - name: system.pgpgio
  476. description: Memory Paged from/to disk
  477. unit: "KiB/s"
  478. chart_type: area
  479. dimensions:
  480. - name: in
  481. - name: out
  482. - name: system.pgfaults
  483. description: Memory Page Faults
  484. unit: "faults/s"
  485. chart_type: line
  486. dimensions:
  487. - name: minor
  488. - name: major
  489. - name: mem.balloon
  490. description: Memory Ballooning Operations
  491. unit: "KiB/s"
  492. chart_type: line
  493. dimensions:
  494. - name: inflate
  495. - name: deflate
  496. - name: migrate
  497. - name: mem.zswapio
  498. description: ZSwap I/O
  499. unit: "KiB/s"
  500. chart_type: area
  501. dimensions:
  502. - name: in
  503. - name: out
  504. - name: mem.ksm_cow
  505. description: KSM Copy On Write Operations
  506. unit: "KiB/s"
  507. chart_type: line
  508. dimensions:
  509. - name: swapin
  510. - name: write
  511. - name: mem.thp_faults
  512. description: Transparent Huge Page Fault Allocations
  513. unit: "events/s"
  514. chart_type: line
  515. dimensions:
  516. - name: alloc
  517. - name: fallback
  518. - name: fallback_charge
  519. - name: mem.thp_file
  520. description: Transparent Huge Page File Allocations
  521. unit: "events/s"
  522. chart_type: line
  523. dimensions:
  524. - name: alloc
  525. - name: fallback
  526. - name: mapped
  527. - name: fallback_charge
  528. - name: mem.thp_zero
  529. description: Transparent Huge Zero Page Allocations
  530. unit: "events/s"
  531. chart_type: line
  532. dimensions:
  533. - name: alloc
  534. - name: failed
  535. - name: mem.thp_collapse
  536. description: Transparent Huge Pages Collapsed by khugepaged
  537. unit: "events/s"
  538. chart_type: line
  539. dimensions:
  540. - name: alloc
  541. - name: failed
  542. - name: mem.thp_split
  543. description: Transparent Huge Page Splits
  544. unit: "events/s"
  545. chart_type: line
  546. dimensions:
  547. - name: split
  548. - name: failed
  549. - name: split_pmd
  550. - name: split_deferred
  551. - name: mem.thp_swapout
  552. description: Transparent Huge Pages Swap Out
  553. unit: "events/s"
  554. chart_type: line
  555. dimensions:
  556. - name: swapout
  557. - name: fallback
  558. - name: mem.thp_compact
  559. description: Transparent Huge Pages Compaction
  560. unit: "events/s"
  561. chart_type: line
  562. dimensions:
  563. - name: success
  564. - name: fail
  565. - name: stall
  566. - name: mem.oom_kill
  567. description: Out of Memory Kills
  568. unit: "kills/s"
  569. chart_type: line
  570. dimensions:
  571. - name: kills
  572. - name: mem.numa
  573. description: NUMA events
  574. unit: "events/s"
  575. chart_type: line
  576. dimensions:
  577. - name: local
  578. - name: foreign
  579. - name: interleave
  580. - name: other
  581. - name: pte_updates
  582. - name: huge_pte_updates
  583. - name: hint_faults
  584. - name: hint_faults_local
  585. - name: pages_migrated
  586. - meta:
  587. plugin_name: proc.plugin
  588. module_name: /proc/interrupts
  589. monitored_instance:
  590. name: Interrupts
  591. link: ""
  592. categories:
  593. - data-collection.linux-systems.cpu-metrics
  594. icon_filename: "linuxserver.svg"
  595. related_resources:
  596. integrations:
  597. list: []
  598. info_provided_to_referring_integrations:
  599. description: ""
  600. keywords:
  601. - interrupts
  602. most_popular: false
  603. overview:
  604. data_collection:
  605. metrics_description: |
  606. Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
  607. The numbers reported are the counts of the interrupts that have occurred of each type.
  608. An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
  609. immediate attention. The processor then interrupts its current activities and executes the interrupt handler
  610. to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
  611. The types of interrupts include:
  612. - **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
  613. you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
  614. - **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
  615. used to switch the CPU among different tasks.
  616. - **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
  617. - **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
  618. Monitoring `/proc/interrupts` can be used for:
  619. - **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
  620. configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
  621. performance degradation.
  622. - **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
  623. - **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
  624. understand what your system is doing. It can provide insights into the system's interaction with hardware,
  625. drivers, and other parts of the kernel.
  626. method_description: ""
  627. supported_platforms:
  628. include: []
  629. exclude: []
  630. multi_instance: true
  631. additional_permissions:
  632. description: ""
  633. default_behavior:
  634. auto_detection:
  635. description: ""
  636. limits:
  637. description: ""
  638. performance_impact:
  639. description: ""
  640. setup:
  641. prerequisites:
  642. list: []
  643. configuration:
  644. file:
  645. name: ""
  646. description: ""
  647. options:
  648. description: ""
  649. folding:
  650. title: ""
  651. enabled: true
  652. list: []
  653. examples:
  654. folding:
  655. enabled: true
  656. title: ""
  657. list: []
  658. troubleshooting:
  659. problems:
  660. list: []
  661. alerts: []
  662. metrics:
  663. folding:
  664. title: Metrics
  665. enabled: false
  666. description: ""
  667. availability: []
  668. scopes:
  669. - name: global
  670. description: ""
  671. labels: []
  672. metrics:
  673. - name: system.interrupts
  674. description: System interrupts
  675. unit: "interrupts/s"
  676. chart_type: stacked
  677. dimensions:
  678. - name: a dimension per device
  679. - name: cpu core
  680. description: ""
  681. labels:
  682. - name: cpu
  683. description: TBD
  684. metrics:
  685. - name: cpu.interrupts
  686. description: CPU interrupts
  687. unit: "interrupts/s"
  688. chart_type: stacked
  689. dimensions:
  690. - name: a dimension per device
  691. - meta:
  692. plugin_name: proc.plugin
  693. module_name: /proc/loadavg
  694. monitored_instance:
  695. name: System Load Average
  696. link: ""
  697. categories:
  698. - data-collection.linux-systems.system-metrics
  699. icon_filename: "linuxserver.svg"
  700. related_resources:
  701. integrations:
  702. list: []
  703. info_provided_to_referring_integrations:
  704. description: ""
  705. keywords:
  706. - load
  707. - load average
  708. most_popular: false
  709. overview:
  710. data_collection:
  711. metrics_description: |
  712. The `/proc/loadavg` file provides information about the system load average.
  713. The load average is a measure of the amount of computational work that a system performs. It is a
  714. representation of the average system load over a period of time.
  715. This file contains three numbers representing the system load averages for the last 1, 5, and 15 minutes,
  716. respectively. It also includes the currently running processes and the total number of processes.
  717. Monitoring the load average can be used for:
  718. - **System performance**: If the load average is too high, it may indicate that your system is overloaded.
  719. On a system with a single CPU, if the load average is 1, it means the single CPU is fully utilized. If the
  720. load averages are consistently higher than the number of CPUs/cores, it may indicate that your system is
  721. overloaded and tasks are waiting for CPU time.
  722. - **Troubleshooting**: If the load average is unexpectedly high, it can be a sign of a problem. This could be
  723. due to a runaway process, a software bug, or a hardware issue.
  724. - **Capacity planning**: By monitoring the load average over time, you can understand the trends in your
  725. system's workload. This can help with capacity planning and scaling decisions.
  726. Remember that load average not only considers CPU usage, but also includes processes waiting for disk I/O.
  727. Therefore, high load averages could be due to I/O contention as well as CPU contention.
  728. method_description: ""
  729. supported_platforms:
  730. include: []
  731. exclude: []
  732. multi_instance: false
  733. additional_permissions:
  734. description: ""
  735. default_behavior:
  736. auto_detection:
  737. description: ""
  738. limits:
  739. description: ""
  740. performance_impact:
  741. description: ""
  742. setup:
  743. prerequisites:
  744. list: []
  745. configuration:
  746. file:
  747. name: ""
  748. description: ""
  749. options:
  750. description: ""
  751. folding:
  752. title: ""
  753. enabled: true
  754. list: []
  755. examples:
  756. folding:
  757. enabled: true
  758. title: ""
  759. list: []
  760. troubleshooting:
  761. problems:
  762. list: []
  763. alerts:
  764. - name: load_cpu_number
  765. link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
  766. metric: system.load
  767. info: number of active CPU cores in the system
  768. os: "linux"
  769. - name: load_average_15
  770. link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
  771. metric: system.load
  772. info: system fifteen-minute load average
  773. os: "linux"
  774. - name: load_average_5
  775. link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
  776. metric: system.load
  777. info: system five-minute load average
  778. os: "linux"
  779. - name: load_average_1
  780. link: https://github.com/netdata/netdata/blob/master/health/health.d/load.conf
  781. metric: system.load
  782. info: system one-minute load average
  783. os: "linux"
  784. - name: active_processes
  785. link: https://github.com/netdata/netdata/blob/master/health/health.d/processes.conf
  786. metric: system.active_processes
  787. info: system process IDs (PID) space utilization
  788. metrics:
  789. folding:
  790. title: Metrics
  791. enabled: false
  792. description: ""
  793. availability: []
  794. scopes:
  795. - name: global
  796. description: ""
  797. labels: []
  798. metrics:
  799. - name: system.load
  800. description: System Load Average
  801. unit: "load"
  802. chart_type: line
  803. dimensions:
  804. - name: load1
  805. - name: load5
  806. - name: load15
  807. - name: system.active_processes
  808. description: System Active Processes
  809. unit: "processes"
  810. chart_type: line
  811. dimensions:
  812. - name: active
  813. - meta:
  814. plugin_name: proc.plugin
  815. module_name: /proc/pressure
  816. monitored_instance:
  817. name: Pressure Stall Information
  818. link: ""
  819. categories:
  820. - data-collection.linux-systems.pressure-metrics
  821. icon_filename: "linuxserver.svg"
  822. related_resources:
  823. integrations:
  824. list: []
  825. info_provided_to_referring_integrations:
  826. description: ""
  827. keywords:
  828. - pressure
  829. most_popular: false
  830. overview:
  831. data_collection:
  832. metrics_description: |
  833. Introduced in Linux kernel 4.20, `/proc/pressure` provides information about system pressure stall information
  834. (PSI). PSI is a feature that allows the system to track the amount of time the system is stalled due to
  835. resource contention, such as CPU, memory, or I/O.
  836. The collectors monitored 3 separate files for CPU, memory, and I/O:
  837. - **cpu**: Tracks the amount of time tasks are stalled due to CPU contention.
  838. - **memory**: Tracks the amount of time tasks are stalled due to memory contention.
  839. - **io**: Tracks the amount of time tasks are stalled due to I/O contention.
  840. - **irq**: Tracks the amount of time tasks are stalled due to IRQ contention.
  841. Each of them provides metrics for stall time over the last 10 seconds, 1 minute, 5 minutes, and 15 minutes.
  842. Monitoring the /proc/pressure files can provide important insights into system performance and capacity planning:
  843. - **Identifying resource contention**: If these metrics are consistently high, it indicates that tasks are
  844. frequently being stalled due to lack of resources, which can significantly degrade system performance.
  845. - **Troubleshooting performance issues**: If a system is experiencing performance issues, these metrics can
  846. help identify whether resource contention is the cause.
  847. - **Capacity planning**: By monitoring these metrics over time, you can understand trends in resource
  848. utilization and make informed decisions about when to add more resources to your system.
  849. method_description: ""
  850. supported_platforms:
  851. include: []
  852. exclude: []
  853. multi_instance: false
  854. additional_permissions:
  855. description: ""
  856. default_behavior:
  857. auto_detection:
  858. description: ""
  859. limits:
  860. description: ""
  861. performance_impact:
  862. description: ""
  863. setup:
  864. prerequisites:
  865. list: []
  866. configuration:
  867. file:
  868. name: ""
  869. description: ""
  870. options:
  871. description: ""
  872. folding:
  873. title: ""
  874. enabled: true
  875. list: []
  876. examples:
  877. folding:
  878. enabled: true
  879. title: ""
  880. list: []
  881. troubleshooting:
  882. problems:
  883. list: []
  884. alerts: []
  885. metrics:
  886. folding:
  887. title: Metrics
  888. enabled: false
  889. description: ""
  890. availability: []
  891. scopes:
  892. - name: global
  893. description: ""
  894. labels: []
  895. metrics:
  896. - name: system.cpu_some_pressure
  897. description: CPU some pressure
  898. unit: "percentage"
  899. chart_type: line
  900. dimensions:
  901. - name: some10
  902. - name: some60
  903. - name: some300
  904. - name: system.cpu_some_pressure_stall_time
  905. description: CPU some pressure stall time
  906. unit: "ms"
  907. chart_type: line
  908. dimensions:
  909. - name: time
  910. - name: system.cpu_full_pressure
  911. description: CPU full pressure
  912. unit: "percentage"
  913. chart_type: line
  914. dimensions:
  915. - name: some10
  916. - name: some60
  917. - name: some300
  918. - name: system.cpu_full_pressure_stall_time
  919. description: CPU full pressure stall time
  920. unit: "ms"
  921. chart_type: line
  922. dimensions:
  923. - name: time
  924. - name: system.memory_some_pressure
  925. description: Memory some pressure
  926. unit: "percentage"
  927. chart_type: line
  928. dimensions:
  929. - name: some10
  930. - name: some60
  931. - name: some300
  932. - name: system.memory_some_pressure_stall_time
  933. description: Memory some pressure stall time
  934. unit: "ms"
  935. chart_type: line
  936. dimensions:
  937. - name: time
  938. - name: system.memory_full_pressure
  939. description: Memory full pressure
  940. unit: "percentage"
  941. chart_type: line
  942. dimensions:
  943. - name: some10
  944. - name: some60
  945. - name: some300
  946. - name: system.memory_full_pressure_stall_time
  947. description: Memory full pressure stall time
  948. unit: "ms"
  949. chart_type: line
  950. dimensions:
  951. - name: time
  952. - name: system.io_some_pressure
  953. description: I/O some pressure
  954. unit: "percentage"
  955. chart_type: line
  956. dimensions:
  957. - name: some10
  958. - name: some60
  959. - name: some300
  960. - name: system.io_some_pressure_stall_time
  961. description: I/O some pressure stall time
  962. unit: "ms"
  963. chart_type: line
  964. dimensions:
  965. - name: time
  966. - name: system.io_full_pressure
  967. description: I/O some pressure
  968. unit: "percentage"
  969. chart_type: line
  970. dimensions:
  971. - name: some10
  972. - name: some60
  973. - name: some300
  974. - name: system.io_full_pressure_stall_time
  975. description: I/O some pressure stall time
  976. unit: "ms"
  977. chart_type: line
  978. dimensions:
  979. - name: time
  980. - meta:
  981. plugin_name: proc.plugin
  982. module_name: /proc/softirqs
  983. monitored_instance:
  984. name: SoftIRQ statistics
  985. link: ""
  986. categories:
  987. - data-collection.linux-systems.cpu-metrics
  988. icon_filename: "linuxserver.svg"
  989. related_resources:
  990. integrations:
  991. list: []
  992. info_provided_to_referring_integrations:
  993. description: ""
  994. keywords:
  995. - softirqs
  996. - interrupts
  997. most_popular: false
  998. overview:
  999. data_collection:
  1000. metrics_description: |
  1001. In the Linux kernel, handling of hardware interrupts is split into two halves: the top half and the bottom half.
  1002. The top half is the routine that responds immediately to an interrupt, while the bottom half is deferred to be processed later.
  1003. Softirqs are a mechanism in the Linux kernel used to handle the bottom halves of interrupts, which can be
  1004. deferred and processed later in a context where it's safe to enable interrupts.
  1005. The actual work of handling the interrupt is offloaded to a softirq and executed later when the system
  1006. decides it's a good time to process them. This helps to keep the system responsive by not blocking the top
  1007. half for too long, which could lead to missed interrupts.
  1008. Monitoring `/proc/softirqs` is useful for:
  1009. - **Performance tuning**: A high rate of softirqs could indicate a performance issue. For instance, a high
  1010. rate of network softirqs (`NET_RX` and `NET_TX`) could indicate a network performance issue.
  1011. - **Troubleshooting**: If a system is behaving unexpectedly, checking the softirqs could provide clues about
  1012. what is going on. For example, a sudden increase in block device softirqs (BLOCK) might indicate a problem
  1013. with a disk.
  1014. - **Understanding system behavior**: Knowing what types of softirqs are happening can help you understand what
  1015. your system is doing, particularly in terms of how it's interacting with hardware and how it's handling
  1016. interrupts.
  1017. method_description: ""
  1018. supported_platforms:
  1019. include: []
  1020. exclude: []
  1021. multi_instance: true
  1022. additional_permissions:
  1023. description: ""
  1024. default_behavior:
  1025. auto_detection:
  1026. description: ""
  1027. limits:
  1028. description: ""
  1029. performance_impact:
  1030. description: ""
  1031. setup:
  1032. prerequisites:
  1033. list: []
  1034. configuration:
  1035. file:
  1036. name: ""
  1037. description: ""
  1038. options:
  1039. description: ""
  1040. folding:
  1041. title: ""
  1042. enabled: true
  1043. list: []
  1044. examples:
  1045. folding:
  1046. enabled: true
  1047. title: ""
  1048. list: []
  1049. troubleshooting:
  1050. problems:
  1051. list: []
  1052. alerts: []
  1053. metrics:
  1054. folding:
  1055. title: Metrics
  1056. enabled: false
  1057. description: ""
  1058. availability: []
  1059. scopes:
  1060. - name: global
  1061. description: ""
  1062. labels: []
  1063. metrics:
  1064. - name: system.softirqs
  1065. description: System softirqs
  1066. unit: "softirqs/s"
  1067. chart_type: stacked
  1068. dimensions:
  1069. - name: a dimension per softirq
  1070. - name: cpu core
  1071. description: ""
  1072. labels:
  1073. - name: cpu
  1074. description: TBD
  1075. metrics:
  1076. - name: cpu.softirqs
  1077. description: CPU softirqs
  1078. unit: "softirqs/s"
  1079. chart_type: stacked
  1080. dimensions:
  1081. - name: a dimension per softirq
  1082. - meta:
  1083. plugin_name: proc.plugin
  1084. module_name: /proc/net/softnet_stat
  1085. monitored_instance:
  1086. name: Softnet Statistics
  1087. link: ""
  1088. categories:
  1089. - data-collection.linux-systems.network-metrics
  1090. icon_filename: "linuxserver.svg"
  1091. related_resources:
  1092. integrations:
  1093. list: []
  1094. info_provided_to_referring_integrations:
  1095. description: ""
  1096. keywords:
  1097. - softnet
  1098. most_popular: false
  1099. overview:
  1100. data_collection:
  1101. metrics_description: |
  1102. `/proc/net/softnet_stat` provides statistics that relate to the handling of network packets by softirq.
  1103. It provides information about:
  1104. - Total number of processed packets (`processed`).
  1105. - Times ksoftirq ran out of quota (`dropped`).
  1106. - Times net_rx_action was rescheduled.
  1107. - Number of times processed all lists before quota.
  1108. - Number of times did not process all lists due to quota.
  1109. - Number of times net_rx_action was rescheduled for GRO (Generic Receive Offload) cells.
  1110. - Number of times GRO cells were processed.
  1111. Monitoring the /proc/net/softnet_stat file can be useful for:
  1112. - **Network performance monitoring**: By tracking the total number of processed packets and how many packets
  1113. were dropped, you can gain insights into your system's network performance.
  1114. - **Troubleshooting**: If you're experiencing network-related issues, this collector can provide valuable clues.
  1115. For instance, a high number of dropped packets may indicate a network problem.
  1116. - **Capacity planning**: If your system is consistently processing near its maximum capacity of network
  1117. packets, it might be time to consider upgrading your network infrastructure.
  1118. method_description: ""
  1119. supported_platforms:
  1120. include: []
  1121. exclude: []
  1122. multi_instance: true
  1123. additional_permissions:
  1124. description: ""
  1125. default_behavior:
  1126. auto_detection:
  1127. description: ""
  1128. limits:
  1129. description: ""
  1130. performance_impact:
  1131. description: ""
  1132. setup:
  1133. prerequisites:
  1134. list: []
  1135. configuration:
  1136. file:
  1137. name: ""
  1138. description: ""
  1139. options:
  1140. description: ""
  1141. folding:
  1142. title: ""
  1143. enabled: true
  1144. list: []
  1145. examples:
  1146. folding:
  1147. enabled: true
  1148. title: ""
  1149. list: []
  1150. troubleshooting:
  1151. problems:
  1152. list: []
  1153. alerts:
  1154. - name: 1min_netdev_backlog_exceeded
  1155. link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf
  1156. metric: system.softnet_stat
  1157. info: average number of dropped packets in the last minute due to exceeded net.core.netdev_max_backlog
  1158. os: "linux"
  1159. - name: 1min_netdev_budget_ran_outs
  1160. link: https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf
  1161. metric: system.softnet_stat
  1162. info:
  1163. average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last
  1164. minute (this can be a cause for dropped packets)
  1165. os: "linux"
  1166. metrics:
  1167. folding:
  1168. title: Metrics
  1169. enabled: false
  1170. description: ""
  1171. availability: []
  1172. scopes:
  1173. - name: global
  1174. description: ""
  1175. labels: []
  1176. metrics:
  1177. - name: system.softnet_stat
  1178. description: System softnet_stat
  1179. unit: "events/s"
  1180. chart_type: line
  1181. dimensions:
  1182. - name: processed
  1183. - name: dropped
  1184. - name: squeezed
  1185. - name: received_rps
  1186. - name: flow_limit_count
  1187. - name: cpu core
  1188. description: ""
  1189. labels: []
  1190. metrics:
  1191. - name: cpu.softnet_stat
  1192. description: CPU softnet_stat
  1193. unit: "events/s"
  1194. chart_type: line
  1195. dimensions:
  1196. - name: processed
  1197. - name: dropped
  1198. - name: squeezed
  1199. - name: received_rps
  1200. - name: flow_limit_count
  1201. - meta:
  1202. plugin_name: proc.plugin
  1203. module_name: /proc/meminfo
  1204. monitored_instance:
  1205. name: Memory Usage
  1206. link: ""
  1207. categories:
  1208. - data-collection.linux-systems.memory-metrics
  1209. icon_filename: "linuxserver.svg"
  1210. related_resources:
  1211. integrations:
  1212. list: []
  1213. info_provided_to_referring_integrations:
  1214. description: ""
  1215. keywords:
  1216. - memory
  1217. - ram
  1218. - available
  1219. - committed
  1220. most_popular: false
  1221. overview:
  1222. data_collection:
  1223. metrics_description: |
  1224. `/proc/meminfo` provides detailed information about the system's current memory usage. It includes information
  1225. about different types of memory, RAM, Swap, ZSwap, HugePages, Transparent HugePages (THP), Kernel memory,
  1226. SLAB memory, memory mappings, and more.
  1227. Monitoring /proc/meminfo can be useful for:
  1228. - **Performance Tuning**: Understanding your system's memory usage can help you make decisions about system
  1229. tuning and optimization. For example, if your system is frequently low on free memory, it might benefit
  1230. from more RAM.
  1231. - **Troubleshooting**: If your system is experiencing problems, `/proc/meminfo` can provide clues about
  1232. whether memory usage is a factor. For example, if your system is slow and cached swap is high, it could
  1233. mean that your system is swapping out a lot of memory to disk, which can degrade performance.
  1234. - **Capacity Planning**: By monitoring memory usage over time, you can understand trends and make informed
  1235. decisions about future capacity needs.
  1236. method_description: ""
  1237. supported_platforms:
  1238. include: []
  1239. exclude: []
  1240. multi_instance: false
  1241. additional_permissions:
  1242. description: ""
  1243. default_behavior:
  1244. auto_detection:
  1245. description: ""
  1246. limits:
  1247. description: ""
  1248. performance_impact:
  1249. description: ""
  1250. setup:
  1251. prerequisites:
  1252. list: []
  1253. configuration:
  1254. file:
  1255. name: ""
  1256. description: ""
  1257. options:
  1258. description: ""
  1259. folding:
  1260. title: ""
  1261. enabled: true
  1262. list: []
  1263. examples:
  1264. folding:
  1265. enabled: true
  1266. title: ""
  1267. list: []
  1268. troubleshooting:
  1269. problems:
  1270. list: []
  1271. alerts:
  1272. - name: ram_in_use
  1273. link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
  1274. metric: system.ram
  1275. info: system memory utilization
  1276. os: "linux"
  1277. - name: ram_available
  1278. link: https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf
  1279. metric: mem.available
  1280. info: percentage of estimated amount of RAM available for userspace processes, without causing swapping
  1281. os: "linux"
  1282. - name: used_swap
  1283. link: https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf
  1284. metric: mem.swap
  1285. info: swap memory utilization
  1286. os: "linux freebsd"
  1287. - name: 1hour_memory_hw_corrupted
  1288. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1289. metric: mem.hwcorrupt
  1290. info: amount of memory corrupted due to a hardware failure
  1291. os: "linux"
  1292. metrics:
  1293. folding:
  1294. title: Metrics
  1295. enabled: false
  1296. description: ""
  1297. availability: []
  1298. scopes:
  1299. - name: global
  1300. description: ""
  1301. labels: []
  1302. metrics:
  1303. - name: system.ram
  1304. description: System RAM
  1305. unit: "MiB"
  1306. chart_type: stacked
  1307. dimensions:
  1308. - name: free
  1309. - name: used
  1310. - name: cached
  1311. - name: buffers
  1312. - name: mem.available
  1313. description: Available RAM for applications
  1314. unit: "MiB"
  1315. chart_type: area
  1316. dimensions:
  1317. - name: avail
  1318. - name: mem.swap
  1319. description: System Swap
  1320. unit: "MiB"
  1321. chart_type: stacked
  1322. dimensions:
  1323. - name: free
  1324. - name: used
  1325. - name: mem.swap_cached
  1326. description: Swap Memory Cached in RAM
  1327. unit: "MiB"
  1328. chart_type: stacked
  1329. dimensions:
  1330. - name: cached
  1331. - name: mem.zswap
  1332. description: Zswap Usage
  1333. unit: "MiB"
  1334. chart_type: stacked
  1335. dimensions:
  1336. - name: in-ram
  1337. - name: on-disk
  1338. - name: mem.hwcorrupt
  1339. description: Corrupted Memory detected by ECC
  1340. unit: "MiB"
  1341. chart_type: line
  1342. dimensions:
  1343. - name: HardwareCorrupted
  1344. - name: mem.commited
  1345. description: Committed (Allocated) Memory
  1346. unit: "MiB"
  1347. chart_type: area
  1348. dimensions:
  1349. - name: Commited_AS
  1350. - name: mem.writeback
  1351. description: Writeback Memory
  1352. unit: "MiB"
  1353. chart_type: line
  1354. dimensions:
  1355. - name: Dirty
  1356. - name: Writeback
  1357. - name: FuseWriteback
  1358. - name: NfsWriteback
  1359. - name: Bounce
  1360. - name: mem.kernel
  1361. description: Memory Used by Kernel
  1362. unit: "MiB"
  1363. chart_type: stacked
  1364. dimensions:
  1365. - name: Slab
  1366. - name: KernelStack
  1367. - name: PageTables
  1368. - name: VmallocUsed
  1369. - name: Percpu
  1370. - name: mem.slab
  1371. description: Reclaimable Kernel Memory
  1372. unit: "MiB"
  1373. chart_type: stacked
  1374. dimensions:
  1375. - name: reclaimable
  1376. - name: unreclaimable
  1377. - name: mem.hugepages
  1378. description: Dedicated HugePages Memory
  1379. unit: "MiB"
  1380. chart_type: stacked
  1381. dimensions:
  1382. - name: free
  1383. - name: used
  1384. - name: surplus
  1385. - name: reserved
  1386. - name: mem.thp
  1387. description: Transparent HugePages Memory
  1388. unit: "MiB"
  1389. chart_type: stacked
  1390. dimensions:
  1391. - name: anonymous
  1392. - name: shmem
  1393. - name: mem.thp_details
  1394. description: Details of Transparent HugePages Usage
  1395. unit: "MiB"
  1396. chart_type: line
  1397. dimensions:
  1398. - name: ShmemPmdMapped
  1399. - name: FileHugePages
  1400. - name: FilePmdMapped
  1401. - name: mem.reclaiming
  1402. description: Memory Reclaiming
  1403. unit: "MiB"
  1404. chart_type: line
  1405. dimensions:
  1406. - name: Active
  1407. - name: Inactive
  1408. - name: Active(anon)
  1409. - name: Inactive(anon)
  1410. - name: Active(file)
  1411. - name: Inactive(file)
  1412. - name: Unevictable
  1413. - name: Mlocked
  1414. - name: mem.high_low
  1415. description: High and Low Used and Free Memory Areas
  1416. unit: "MiB"
  1417. chart_type: stacked
  1418. dimensions:
  1419. - name: high_used
  1420. - name: low_used
  1421. - name: high_free
  1422. - name: low_free
  1423. - name: mem.cma
  1424. description: Contiguous Memory Allocator (CMA) Memory
  1425. unit: "MiB"
  1426. chart_type: stacked
  1427. dimensions:
  1428. - name: used
  1429. - name: free
  1430. - name: mem.directmaps
  1431. description: Direct Memory Mappings
  1432. unit: "MiB"
  1433. chart_type: stacked
  1434. dimensions:
  1435. - name: 4k
  1436. - name: 2m
  1437. - name: 4m
  1438. - name: 1g
  1439. - meta:
  1440. plugin_name: proc.plugin
  1441. module_name: /proc/pagetypeinfo
  1442. monitored_instance:
  1443. name: Page types
  1444. link: ""
  1445. categories:
  1446. - data-collection.linux-systems.memory-metrics
  1447. icon_filename: "microchip.svg"
  1448. related_resources:
  1449. integrations:
  1450. list: []
  1451. info_provided_to_referring_integrations:
  1452. description: ""
  1453. keywords:
  1454. - memory page types
  1455. most_popular: false
  1456. overview:
  1457. data_collection:
  1458. metrics_description: "This integration provides metrics about the system's memory page types"
  1459. method_description: ""
  1460. supported_platforms:
  1461. include: []
  1462. exclude: []
  1463. multi_instance: false
  1464. additional_permissions:
  1465. description: ""
  1466. default_behavior:
  1467. auto_detection:
  1468. description: ""
  1469. limits:
  1470. description: ""
  1471. performance_impact:
  1472. description: ""
  1473. setup:
  1474. prerequisites:
  1475. list: []
  1476. configuration:
  1477. file:
  1478. name: ""
  1479. description: ""
  1480. options:
  1481. description: ""
  1482. folding:
  1483. title: ""
  1484. enabled: true
  1485. list: []
  1486. examples:
  1487. folding:
  1488. enabled: true
  1489. title: ""
  1490. list: []
  1491. troubleshooting:
  1492. problems:
  1493. list: []
  1494. alerts: []
  1495. metrics:
  1496. folding:
  1497. title: Metrics
  1498. enabled: false
  1499. description: ""
  1500. availability: []
  1501. scopes:
  1502. - name: global
  1503. description: ""
  1504. labels: []
  1505. metrics:
  1506. - name: mem.pagetype_global
  1507. description: System orders available
  1508. unit: "B"
  1509. chart_type: stacked
  1510. dimensions:
  1511. - name: a dimension per pagesize
  1512. - name: node, zone, type
  1513. description: ""
  1514. labels:
  1515. - name: node_id
  1516. description: TBD
  1517. - name: node_zone
  1518. description: TBD
  1519. - name: node_type
  1520. description: TBD
  1521. metrics:
  1522. - name: mem.pagetype
  1523. description: pagetype_Node{node}_{zone}_{type}
  1524. unit: "B"
  1525. chart_type: stacked
  1526. dimensions:
  1527. - name: a dimension per pagesize
  1528. - meta:
  1529. plugin_name: proc.plugin
  1530. module_name: /sys/devices/system/edac/mc
  1531. monitored_instance:
  1532. name: Memory modules (DIMMs)
  1533. link: ""
  1534. categories:
  1535. - data-collection.linux-systems.memory-metrics
  1536. icon_filename: "microchip.svg"
  1537. related_resources:
  1538. integrations:
  1539. list: []
  1540. info_provided_to_referring_integrations:
  1541. description: ""
  1542. keywords:
  1543. - edac
  1544. - ecc
  1545. - dimm
  1546. - ram
  1547. - hardware
  1548. most_popular: false
  1549. overview:
  1550. data_collection:
  1551. metrics_description: |
  1552. The Error Detection and Correction (EDAC) subsystem is detecting and reporting errors in the system's memory,
  1553. primarily ECC (Error-Correcting Code) memory errors.
  1554. The collector provides data for:
  1555. - Per memory controller (MC): correctable and uncorrectable errors. These can be of 2 kinds:
  1556. - errors related to a DIMM
  1557. - errors that cannot be associated with a DIMM
  1558. - Per memory DIMM: correctable and uncorrectable errors. There are 2 kinds:
  1559. - memory controllers that can identify the physical DIMMS and report errors directly for them,
  1560. - memory controllers that report errors for memory address ranges that can be linked to dimms.
  1561. In this case the DIMMS reported may be more than the physical DIMMS installed.
  1562. method_description: ""
  1563. supported_platforms:
  1564. include: []
  1565. exclude: []
  1566. multi_instance: true
  1567. additional_permissions:
  1568. description: ""
  1569. default_behavior:
  1570. auto_detection:
  1571. description: ""
  1572. limits:
  1573. description: ""
  1574. performance_impact:
  1575. description: ""
  1576. setup:
  1577. prerequisites:
  1578. list: []
  1579. configuration:
  1580. file:
  1581. name: ""
  1582. description: ""
  1583. options:
  1584. description: ""
  1585. folding:
  1586. title: ""
  1587. enabled: true
  1588. list: []
  1589. examples:
  1590. folding:
  1591. enabled: true
  1592. title: ""
  1593. list: []
  1594. troubleshooting:
  1595. problems:
  1596. list: []
  1597. alerts:
  1598. - name: ecc_memory_mc_noinfo_correctable
  1599. metric: mem.edac_mc
  1600. info: memory controller ${label:controller} ECC correctable errors (unknown DIMM slot) in the last 10 minutes
  1601. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1602. - name: ecc_memory_mc_noinfo_uncorrectable
  1603. metric: mem.edac_mc
  1604. info: memory controller ${label:controller} ECC uncorrectable errors (unknown DIMM slot) in the last 10 minutes
  1605. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1606. - name: ecc_memory_dimm_correctable
  1607. metric: mem.edac_mc_dimm
  1608. info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors in the last 10 minutes
  1609. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1610. - name: ecc_memory_dimm_uncorrectable
  1611. metric: mem.edac_mc_dimm
  1612. info: DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors in the last 10 minutes
  1613. link: https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf
  1614. metrics:
  1615. folding:
  1616. title: Metrics
  1617. enabled: false
  1618. description: ""
  1619. availability: []
  1620. scopes:
  1621. - name: memory controller
  1622. description: These metrics refer to the memory controller.
  1623. labels:
  1624. - name: controller
  1625. description: "[mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller."
  1626. - name: mc_name
  1627. description: Memory controller type.
  1628. - name: size_mb
  1629. description: The amount of memory in megabytes that this memory controller manages.
  1630. - name: max_location
  1631. description: Last available memory slot in this memory controller.
  1632. metrics:
  1633. - name: mem.edac_mc
  1634. description: Memory Controller (MC) Error Detection And Correction (EDAC) Errors
  1635. unit: errors/s
  1636. chart_type: line
  1637. dimensions:
  1638. - name: correctable
  1639. - name: uncorrectable
  1640. - name: correctable_noinfo
  1641. - name: uncorrectable_noinfo
  1642. - name: memory module
  1643. description: These metrics refer to the memory module (or rank, [depends on the memory controller](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#f5)).
  1644. labels:
  1645. - name: controller
  1646. description: "[mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller."
  1647. - name: dimm
  1648. description: "[dimmX or rankX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#dimmx-or-rankx-directories) directory name of this memory module."
  1649. - name: dimm_dev_type
  1650. description: Type of DRAM device used in this memory module. For example, x1, x2, x4, x8.
  1651. - name: dimm_edac_mode
  1652. description: Used type of error detection and correction. For example, S4ECD4ED would mean a Chipkill with x4 DRAM.
  1653. - name: dimm_label
  1654. description: Label assigned to this memory module.
  1655. - name: dimm_location
  1656. description: Location of the memory module.
  1657. - name: dimm_mem_type
  1658. description: Type of the memory module.
  1659. - name: size
  1660. description: The amount of memory in megabytes that this memory module manages.
  1661. metrics:
  1662. - name: mem.edac_mc
  1663. description: DIMM Error Detection And Correction (EDAC) Errors
  1664. unit: errors/s
  1665. chart_type: line
  1666. dimensions:
  1667. - name: correctable
  1668. - name: uncorrectable
  1669. - meta:
  1670. plugin_name: proc.plugin
  1671. module_name: /sys/devices/system/node
  1672. monitored_instance:
  1673. name: Non-Uniform Memory Access
  1674. link: ""
  1675. categories:
  1676. - data-collection.linux-systems.memory-metrics
  1677. icon_filename: "linuxserver.svg"
  1678. related_resources:
  1679. integrations:
  1680. list: []
  1681. info_provided_to_referring_integrations:
  1682. description: ""
  1683. keywords:
  1684. - numa
  1685. most_popular: false
  1686. overview:
  1687. data_collection:
  1688. metrics_description: |
  1689. Information about NUMA (Non-Uniform Memory Access) nodes on the system.
  1690. NUMA is a method of configuring a cluster of microprocessor in a multiprocessing system so that they can
  1691. share memory locally, improving performance and the ability of the system to be expanded. NUMA is used in a
  1692. symmetric multiprocessing (SMP) system.
  1693. In a NUMA system, processors, memory, and I/O devices are grouped together into cells, also known as nodes.
  1694. Each node has its own memory and set of I/O devices, and one or more processors. While a processor can access
  1695. memory in any of the nodes, it does so faster when accessing memory within its own node.
  1696. The collector provides statistics on memory allocations for processes running on the NUMA nodes, revealing the
  1697. efficiency of memory allocations in multi-node systems.
  1698. method_description: ""
  1699. supported_platforms:
  1700. include: []
  1701. exclude: []
  1702. multi_instance: true
  1703. additional_permissions:
  1704. description: ""
  1705. default_behavior:
  1706. auto_detection:
  1707. description: ""
  1708. limits:
  1709. description: ""
  1710. performance_impact:
  1711. description: ""
  1712. setup:
  1713. prerequisites:
  1714. list: []
  1715. configuration:
  1716. file:
  1717. name: ""
  1718. description: ""
  1719. options:
  1720. description: ""
  1721. folding:
  1722. title: ""
  1723. enabled: true
  1724. list: []
  1725. examples:
  1726. folding:
  1727. enabled: true
  1728. title: ""
  1729. list: []
  1730. troubleshooting:
  1731. problems:
  1732. list: []
  1733. alerts: []
  1734. metrics:
  1735. folding:
  1736. title: Metrics
  1737. enabled: false
  1738. description: ""
  1739. availability: []
  1740. scopes:
  1741. - name: numa node
  1742. description: ""
  1743. labels:
  1744. - name: numa_node
  1745. description: TBD
  1746. metrics:
  1747. - name: mem.numa_nodes
  1748. description: NUMA events
  1749. unit: "events/s"
  1750. chart_type: line
  1751. dimensions:
  1752. - name: hit
  1753. - name: miss
  1754. - name: local
  1755. - name: foreign
  1756. - name: interleave
  1757. - name: other
  1758. - meta:
  1759. plugin_name: proc.plugin
  1760. module_name: /sys/kernel/mm/ksm
  1761. monitored_instance:
  1762. name: Kernel Same-Page Merging
  1763. link: ""
  1764. categories:
  1765. - data-collection.linux-systems.memory-metrics
  1766. icon_filename: "microchip.svg"
  1767. related_resources:
  1768. integrations:
  1769. list: []
  1770. info_provided_to_referring_integrations:
  1771. description: ""
  1772. keywords:
  1773. - ksm
  1774. - samepage
  1775. - merging
  1776. most_popular: false
  1777. overview:
  1778. data_collection:
  1779. metrics_description: |
  1780. Kernel Samepage Merging (KSM) is a memory-saving feature in Linux that enables the kernel to examine the
  1781. memory of different processes and identify identical pages. It then merges these identical pages into a
  1782. single page that the processes share. This is particularly useful for virtualization, where multiple virtual
  1783. machines might be running the same operating system or applications and have many identical pages.
  1784. The collector provides information about the operation and effectiveness of KSM on your system.
  1785. method_description: ""
  1786. supported_platforms:
  1787. include: []
  1788. exclude: []
  1789. multi_instance: false
  1790. additional_permissions:
  1791. description: ""
  1792. default_behavior:
  1793. auto_detection:
  1794. description: ""
  1795. limits:
  1796. description: ""
  1797. performance_impact:
  1798. description: ""
  1799. setup:
  1800. prerequisites:
  1801. list: []
  1802. configuration:
  1803. file:
  1804. name: ""
  1805. description: ""
  1806. options:
  1807. description: ""
  1808. folding:
  1809. title: ""
  1810. enabled: true
  1811. list: []
  1812. examples:
  1813. folding:
  1814. enabled: true
  1815. title: ""
  1816. list: []
  1817. troubleshooting:
  1818. problems:
  1819. list: []
  1820. alerts: []
  1821. metrics:
  1822. folding:
  1823. title: Metrics
  1824. enabled: false
  1825. description: ""
  1826. availability: []
  1827. scopes:
  1828. - name: global
  1829. description: ""
  1830. labels: []
  1831. metrics:
  1832. - name: mem.ksm
  1833. description: Kernel Same Page Merging
  1834. unit: "MiB"
  1835. chart_type: stacked
  1836. dimensions:
  1837. - name: shared
  1838. - name: unshared
  1839. - name: sharing
  1840. - name: volatile
  1841. - name: mem.ksm_savings
  1842. description: Kernel Same Page Merging Savings
  1843. unit: "MiB"
  1844. chart_type: area
  1845. dimensions:
  1846. - name: savings
  1847. - name: offered
  1848. - name: mem.ksm_ratios
  1849. description: Kernel Same Page Merging Effectiveness
  1850. unit: "percentage"
  1851. chart_type: line
  1852. dimensions:
  1853. - name: savings
  1854. - meta:
  1855. plugin_name: proc.plugin
  1856. module_name: /sys/block/zram
  1857. monitored_instance:
  1858. name: ZRAM
  1859. link: ""
  1860. categories:
  1861. - data-collection.linux-systems.memory-metrics
  1862. icon_filename: "microchip.svg"
  1863. related_resources:
  1864. integrations:
  1865. list: []
  1866. info_provided_to_referring_integrations:
  1867. description: ""
  1868. keywords:
  1869. - zram
  1870. most_popular: false
  1871. overview:
  1872. data_collection:
  1873. metrics_description: |
  1874. zRAM, or compressed RAM, is a block device that uses a portion of your system's RAM as a block device.
  1875. The data written to this block device is compressed and stored in memory.
  1876. The collectors provides information about the operation and the effectiveness of zRAM on your system.
  1877. method_description: ""
  1878. supported_platforms:
  1879. include: []
  1880. exclude: []
  1881. multi_instance: true
  1882. additional_permissions:
  1883. description: ""
  1884. default_behavior:
  1885. auto_detection:
  1886. description: ""
  1887. limits:
  1888. description: ""
  1889. performance_impact:
  1890. description: ""
  1891. setup:
  1892. prerequisites:
  1893. list: []
  1894. configuration:
  1895. file:
  1896. name: ""
  1897. description: ""
  1898. options:
  1899. description: ""
  1900. folding:
  1901. title: ""
  1902. enabled: true
  1903. list: []
  1904. examples:
  1905. folding:
  1906. enabled: true
  1907. title: ""
  1908. list: []
  1909. troubleshooting:
  1910. problems:
  1911. list: []
  1912. alerts: []
  1913. metrics:
  1914. folding:
  1915. title: Metrics
  1916. enabled: false
  1917. description: ""
  1918. availability: []
  1919. scopes:
  1920. - name: zram device
  1921. description: ""
  1922. labels:
  1923. - name: device
  1924. description: TBD
  1925. metrics:
  1926. - name: mem.zram_usage
  1927. description: ZRAM Memory Usage
  1928. unit: "MiB"
  1929. chart_type: area
  1930. dimensions:
  1931. - name: compressed
  1932. - name: metadata
  1933. - name: mem.zram_savings
  1934. description: ZRAM Memory Savings
  1935. unit: "MiB"
  1936. chart_type: area
  1937. dimensions:
  1938. - name: savings
  1939. - name: original
  1940. - name: mem.zram_ratio
  1941. description: ZRAM Compression Ratio (original to compressed)
  1942. unit: "ratio"
  1943. chart_type: line
  1944. dimensions:
  1945. - name: ratio
  1946. - name: mem.zram_efficiency
  1947. description: ZRAM Efficiency
  1948. unit: "percentage"
  1949. chart_type: line
  1950. dimensions:
  1951. - name: percent
  1952. - meta:
  1953. plugin_name: proc.plugin
  1954. module_name: ipc
  1955. monitored_instance:
  1956. name: Inter Process Communication
  1957. link: ""
  1958. categories:
  1959. - data-collection.linux-systems.ipc-metrics
  1960. icon_filename: "network-wired.svg"
  1961. related_resources:
  1962. integrations:
  1963. list: []
  1964. info_provided_to_referring_integrations:
  1965. description: ""
  1966. keywords:
  1967. - ipc
  1968. - semaphores
  1969. - shared memory
  1970. most_popular: false
  1971. overview:
  1972. data_collection:
  1973. metrics_description: |
  1974. IPC stands for Inter-Process Communication. It is a mechanism which allows processes to communicate with each
  1975. other and synchronize their actions.
  1976. This collector exposes information about:
  1977. - Message Queues: This allows messages to be exchanged between processes. It's a more flexible method that
  1978. allows messages to be placed onto a queue and read at a later time.
  1979. - Shared Memory: This method allows for the fastest form of IPC because processes can exchange data by
  1980. reading/writing into shared memory segments.
  1981. - Semaphores: They are used to synchronize the operations performed by independent processes. So, if multiple
  1982. processes are trying to access a single shared resource, semaphores can ensure that only one process
  1983. accesses the resource at a given time.
  1984. method_description: ""
  1985. supported_platforms:
  1986. include: []
  1987. exclude: []
  1988. multi_instance: false
  1989. additional_permissions:
  1990. description: ""
  1991. default_behavior:
  1992. auto_detection:
  1993. description: ""
  1994. limits:
  1995. description: ""
  1996. performance_impact:
  1997. description: ""
  1998. setup:
  1999. prerequisites:
  2000. list: []
  2001. configuration:
  2002. file:
  2003. name: ""
  2004. description: ""
  2005. options:
  2006. description: ""
  2007. folding:
  2008. title: ""
  2009. enabled: true
  2010. list: []
  2011. examples:
  2012. folding:
  2013. enabled: true
  2014. title: ""
  2015. list: []
  2016. troubleshooting:
  2017. problems:
  2018. list: []
  2019. alerts:
  2020. - name: semaphores_used
  2021. link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf
  2022. metric: system.ipc_semaphores
  2023. info: IPC semaphore utilization
  2024. os: "linux"
  2025. - name: semaphore_arrays_used
  2026. link: https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf
  2027. metric: system.ipc_semaphore_arrays
  2028. info: IPC semaphore arrays utilization
  2029. os: "linux"
  2030. metrics:
  2031. folding:
  2032. title: Metrics
  2033. enabled: false
  2034. description: ""
  2035. availability: []
  2036. scopes:
  2037. - name: global
  2038. description: ""
  2039. labels: []
  2040. metrics:
  2041. - name: system.ipc_semaphores
  2042. description: IPC Semaphores
  2043. unit: "semaphores"
  2044. chart_type: area
  2045. dimensions:
  2046. - name: semaphores
  2047. - name: system.ipc_semaphore_arrays
  2048. description: IPC Semaphore Arrays
  2049. unit: "arrays"
  2050. chart_type: area
  2051. dimensions:
  2052. - name: arrays
  2053. - name: system.message_queue_message
  2054. description: IPC Message Queue Number of Messages
  2055. unit: "messages"
  2056. chart_type: stacked
  2057. dimensions:
  2058. - name: a dimension per queue
  2059. - name: system.message_queue_bytes
  2060. description: IPC Message Queue Used Bytes
  2061. unit: "bytes"
  2062. chart_type: stacked
  2063. dimensions:
  2064. - name: a dimension per queue
  2065. - name: system.shared_memory_segments
  2066. description: IPC Shared Memory Number of Segments
  2067. unit: "segments"
  2068. chart_type: stacked
  2069. dimensions:
  2070. - name: segments
  2071. - name: system.shared_memory_bytes
  2072. description: IPC Shared Memory Used Bytes
  2073. unit: "bytes"
  2074. chart_type: stacked
  2075. dimensions:
  2076. - name: bytes
  2077. - meta:
  2078. plugin_name: proc.plugin
  2079. module_name: /proc/diskstats
  2080. monitored_instance:
  2081. name: Disk Statistics
  2082. link: ""
  2083. categories:
  2084. - data-collection.linux-systems.disk-metrics
  2085. icon_filename: "hard-drive.svg"
  2086. related_resources:
  2087. integrations:
  2088. list: []
  2089. info_provided_to_referring_integrations:
  2090. description: ""
  2091. keywords:
  2092. - disk
  2093. - disks
  2094. - io
  2095. - bcache
  2096. - block devices
  2097. most_popular: false
  2098. overview:
  2099. data_collection:
  2100. metrics_description: |
  2101. Detailed statistics for each of your system's disk devices and partitions.
  2102. The data is reported by the kernel and can be used to monitor disk activity on a Linux system.
  2103. Get valuable insight into how your disks are performing and where potential bottlenecks might be.
  2104. method_description: ""
  2105. supported_platforms:
  2106. include: []
  2107. exclude: []
  2108. multi_instance: true
  2109. additional_permissions:
  2110. description: ""
  2111. default_behavior:
  2112. auto_detection:
  2113. description: ""
  2114. limits:
  2115. description: ""
  2116. performance_impact:
  2117. description: ""
  2118. setup:
  2119. prerequisites:
  2120. list: []
  2121. configuration:
  2122. file:
  2123. name: ""
  2124. description: ""
  2125. options:
  2126. description: ""
  2127. folding:
  2128. title: ""
  2129. enabled: true
  2130. list: []
  2131. examples:
  2132. folding:
  2133. enabled: true
  2134. title: ""
  2135. list: []
  2136. troubleshooting:
  2137. problems:
  2138. list: []
  2139. alerts:
  2140. - name: 10min_disk_backlog
  2141. link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf
  2142. metric: disk.backlog
  2143. info: average backlog size of the ${label:device} disk over the last 10 minutes
  2144. os: "linux"
  2145. - name: 10min_disk_utilization
  2146. link: https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf
  2147. metric: disk.util
  2148. info: average percentage of time ${label:device} disk was busy over the last 10 minutes
  2149. os: "linux freebsd"
  2150. - name: bcache_cache_dirty
  2151. link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf
  2152. metric: disk.bcache_cache_alloc
  2153. info: percentage of cache space used for dirty data and metadata (this usually means your SSD cache is too small)
  2154. - name: bcache_cache_errors
  2155. link: https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf
  2156. metric: disk.bcache_cache_read_races
  2157. info:
  2158. number of times data was read from the cache, the bucket was reused and invalidated in the last 10 minutes (when this occurs the data is
  2159. reread from the backing device)
  2160. metrics:
  2161. folding:
  2162. title: Metrics
  2163. enabled: false
  2164. description: ""
  2165. availability: []
  2166. scopes:
  2167. - name: global
  2168. description: ""
  2169. labels: []
  2170. metrics:
  2171. - name: system.io
  2172. description: Disk I/O
  2173. unit: "KiB/s"
  2174. chart_type: area
  2175. dimensions:
  2176. - name: in
  2177. - name: out
  2178. - name: disk
  2179. description: ""
  2180. labels:
  2181. - name: device
  2182. description: TBD
  2183. - name: mount_point
  2184. description: TBD
  2185. - name: device_type
  2186. description: TBD
  2187. metrics:
  2188. - name: disk.io
  2189. description: Disk I/O Bandwidth
  2190. unit: "KiB/s"
  2191. chart_type: area
  2192. dimensions:
  2193. - name: reads
  2194. - name: writes
  2195. - name: disk_ext.io
  2196. description: Amount of Discarded Data
  2197. unit: "KiB/s"
  2198. chart_type: area
  2199. dimensions:
  2200. - name: discards
  2201. - name: disk.ops
  2202. description: Disk Completed I/O Operations
  2203. unit: "operations/s"
  2204. chart_type: line
  2205. dimensions:
  2206. - name: reads
  2207. - name: writes
  2208. - name: disk_ext.ops
  2209. description: Disk Completed Extended I/O Operations
  2210. unit: "operations/s"
  2211. chart_type: line
  2212. dimensions:
  2213. - name: discards
  2214. - name: flushes
  2215. - name: disk.qops
  2216. description: Disk Current I/O Operations
  2217. unit: "operations"
  2218. chart_type: line
  2219. dimensions:
  2220. - name: operations
  2221. - name: disk.backlog
  2222. description: Disk Backlog
  2223. unit: "milliseconds"
  2224. chart_type: area
  2225. dimensions:
  2226. - name: backlog
  2227. - name: disk.busy
  2228. description: Disk Busy Time
  2229. unit: "milliseconds"
  2230. chart_type: area
  2231. dimensions:
  2232. - name: busy
  2233. - name: disk.util
  2234. description: Disk Utilization Time
  2235. unit: "% of time working"
  2236. chart_type: area
  2237. dimensions:
  2238. - name: utilization
  2239. - name: disk.mops
  2240. description: Disk Merged Operations
  2241. unit: "merged operations/s"
  2242. chart_type: line
  2243. dimensions:
  2244. - name: reads
  2245. - name: writes
  2246. - name: disk_ext.mops
  2247. description: Disk Merged Discard Operations
  2248. unit: "merged operations/s"
  2249. chart_type: line
  2250. dimensions:
  2251. - name: discards
  2252. - name: disk.iotime
  2253. description: Disk Total I/O Time
  2254. unit: "milliseconds/s"
  2255. chart_type: line
  2256. dimensions:
  2257. - name: reads
  2258. - name: writes
  2259. - name: disk_ext.iotime
  2260. description: Disk Total I/O Time for Extended Operations
  2261. unit: "milliseconds/s"
  2262. chart_type: line
  2263. dimensions:
  2264. - name: discards
  2265. - name: flushes
  2266. - name: disk.await
  2267. description: Average Completed I/O Operation Time
  2268. unit: "milliseconds/operation"
  2269. chart_type: line
  2270. dimensions:
  2271. - name: reads
  2272. - name: writes
  2273. - name: disk_ext.await
  2274. description: Average Completed Extended I/O Operation Time
  2275. unit: "milliseconds/operation"
  2276. chart_type: line
  2277. dimensions:
  2278. - name: discards
  2279. - name: flushes
  2280. - name: disk.avgsz
  2281. description: Average Completed I/O Operation Bandwidth
  2282. unit: "KiB/operation"
  2283. chart_type: area
  2284. dimensions:
  2285. - name: reads
  2286. - name: writes
  2287. - name: disk_ext.avgsz
  2288. description: Average Amount of Discarded Data
  2289. unit: "KiB/operation"
  2290. chart_type: area
  2291. dimensions:
  2292. - name: discards
  2293. - name: disk.svctm
  2294. description: Average Service Time
  2295. unit: "milliseconds/operation"
  2296. chart_type: line
  2297. dimensions:
  2298. - name: svctm
  2299. - name: disk.bcache_cache_alloc
  2300. description: BCache Cache Allocations
  2301. unit: "percentage"
  2302. chart_type: stacked
  2303. dimensions:
  2304. - name: ununsed
  2305. - name: dirty
  2306. - name: clean
  2307. - name: metadata
  2308. - name: undefined
  2309. - name: disk.bcache_hit_ratio
  2310. description: BCache Cache Hit Ratio
  2311. unit: "percentage"
  2312. chart_type: line
  2313. dimensions:
  2314. - name: 5min
  2315. - name: 1hour
  2316. - name: 1day
  2317. - name: ever
  2318. - name: disk.bcache_rates
  2319. description: BCache Rates
  2320. unit: "KiB/s"
  2321. chart_type: area
  2322. dimensions:
  2323. - name: congested
  2324. - name: writeback
  2325. - name: disk.bcache_size
  2326. description: BCache Cache Sizes
  2327. unit: "MiB"
  2328. chart_type: area
  2329. dimensions:
  2330. - name: dirty
  2331. - name: disk.bcache_usage
  2332. description: BCache Cache Usage
  2333. unit: "percentage"
  2334. chart_type: area
  2335. dimensions:
  2336. - name: avail
  2337. - name: disk.bcache_cache_read_races
  2338. description: BCache Cache Read Races
  2339. unit: "operations/s"
  2340. chart_type: line
  2341. dimensions:
  2342. - name: races
  2343. - name: errors
  2344. - name: disk.bcache
  2345. description: BCache Cache I/O Operations
  2346. unit: "operations/s"
  2347. chart_type: line
  2348. dimensions:
  2349. - name: hits
  2350. - name: misses
  2351. - name: collisions
  2352. - name: readaheads
  2353. - name: disk.bcache_bypass
  2354. description: BCache Cache Bypass I/O Operations
  2355. unit: "operations/s"
  2356. chart_type: line
  2357. dimensions:
  2358. - name: hits
  2359. - name: misses
  2360. - meta:
  2361. plugin_name: proc.plugin
  2362. module_name: /proc/mdstat
  2363. monitored_instance:
  2364. name: MD RAID
  2365. link: ""
  2366. categories:
  2367. - data-collection.linux-systems.disk-metrics
  2368. icon_filename: "hard-drive.svg"
  2369. related_resources:
  2370. integrations:
  2371. list: []
  2372. info_provided_to_referring_integrations:
  2373. description: ""
  2374. keywords:
  2375. - raid
  2376. - mdadm
  2377. - mdstat
  2378. - raid
  2379. most_popular: false
  2380. overview:
  2381. data_collection:
  2382. metrics_description: "This integration monitors the status of MD RAID devices."
  2383. method_description: ""
  2384. supported_platforms:
  2385. include: []
  2386. exclude: []
  2387. multi_instance: true
  2388. additional_permissions:
  2389. description: ""
  2390. default_behavior:
  2391. auto_detection:
  2392. description: ""
  2393. limits:
  2394. description: ""
  2395. performance_impact:
  2396. description: ""
  2397. setup:
  2398. prerequisites:
  2399. list: []
  2400. configuration:
  2401. file:
  2402. name: ""
  2403. description: ""
  2404. options:
  2405. description: ""
  2406. folding:
  2407. title: ""
  2408. enabled: true
  2409. list: []
  2410. examples:
  2411. folding:
  2412. enabled: true
  2413. title: ""
  2414. list: []
  2415. troubleshooting:
  2416. problems:
  2417. list: []
  2418. alerts:
  2419. - name: mdstat_last_collected
  2420. link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
  2421. metric: md.disks
  2422. info: number of seconds since the last successful data collection
  2423. - name: mdstat_disks
  2424. link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
  2425. metric: md.disks
  2426. info:
  2427. number of devices in the down state for the ${label:device} ${label:raid_level} array. Any number > 0 indicates that the array is degraded.
  2428. - name: mdstat_mismatch_cnt
  2429. link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
  2430. metric: md.mismatch_cnt
  2431. info: number of unsynchronized blocks for the ${label:device} ${label:raid_level} array
  2432. - name: mdstat_nonredundant_last_collected
  2433. link: https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf
  2434. metric: md.nonredundant
  2435. info: number of seconds since the last successful data collection
  2436. metrics:
  2437. folding:
  2438. title: Metrics
  2439. enabled: false
  2440. description: ""
  2441. availability: []
  2442. scopes:
  2443. - name: global
  2444. description: ""
  2445. labels: []
  2446. metrics:
  2447. - name: md.health
  2448. description: Faulty Devices In MD
  2449. unit: "failed disks"
  2450. chart_type: line
  2451. dimensions:
  2452. - name: a dimension per md array
  2453. - name: md array
  2454. description: ""
  2455. labels:
  2456. - name: device
  2457. description: TBD
  2458. - name: raid_level
  2459. description: TBD
  2460. metrics:
  2461. - name: md.disks
  2462. description: Disks Stats
  2463. unit: "disks"
  2464. chart_type: stacked
  2465. dimensions:
  2466. - name: inuse
  2467. - name: down
  2468. - name: md.mismatch_cnt
  2469. description: Mismatch Count
  2470. unit: "unsynchronized blocks"
  2471. chart_type: line
  2472. dimensions:
  2473. - name: count
  2474. - name: md.status
  2475. description: Current Status
  2476. unit: "percent"
  2477. chart_type: line
  2478. dimensions:
  2479. - name: check
  2480. - name: resync
  2481. - name: recovery
  2482. - name: reshape
  2483. - name: md.expected_time_until_operation_finish
  2484. description: Approximate Time Until Finish
  2485. unit: "seconds"
  2486. chart_type: line
  2487. dimensions:
  2488. - name: finish_in
  2489. - name: md.operation_speed
  2490. description: Operation Speed
  2491. unit: "KiB/s"
  2492. chart_type: line
  2493. dimensions:
  2494. - name: speed
  2495. - name: md.nonredundant
  2496. description: Nonredundant Array Availability
  2497. unit: "boolean"
  2498. chart_type: line
  2499. dimensions:
  2500. - name: available
  2501. - meta:
  2502. plugin_name: proc.plugin
  2503. module_name: /proc/net/dev
  2504. monitored_instance:
  2505. name: Network interfaces
  2506. link: ""
  2507. categories:
  2508. - data-collection.linux-systems.network-metrics
  2509. icon_filename: "network-wired.svg"
  2510. related_resources:
  2511. integrations:
  2512. list: []
  2513. info_provided_to_referring_integrations:
  2514. description: ""
  2515. keywords:
  2516. - network interfaces
  2517. most_popular: false
  2518. overview:
  2519. data_collection:
  2520. metrics_description: "Monitor network interface metrics about bandwidth, state, errors and more."
  2521. method_description: ""
  2522. supported_platforms:
  2523. include: []
  2524. exclude: []
  2525. multi_instance: true
  2526. additional_permissions:
  2527. description: ""
  2528. default_behavior:
  2529. auto_detection:
  2530. description: ""
  2531. limits:
  2532. description: ""
  2533. performance_impact:
  2534. description: ""
  2535. setup:
  2536. prerequisites:
  2537. list: []
  2538. configuration:
  2539. file:
  2540. name: ""
  2541. description: ""
  2542. options:
  2543. description: ""
  2544. folding:
  2545. title: ""
  2546. enabled: true
  2547. list: []
  2548. examples:
  2549. folding:
  2550. enabled: true
  2551. title: ""
  2552. list: []
  2553. troubleshooting:
  2554. problems:
  2555. list: []
  2556. alerts:
  2557. - name: interface_speed
  2558. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2559. metric: net.net
  2560. info: network interface ${label:device} current speed
  2561. os: "*"
  2562. - name: 1m_received_traffic_overflow
  2563. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2564. metric: net.net
  2565. info: average inbound utilization for the network interface ${label:device} over the last minute
  2566. os: "linux"
  2567. - name: 1m_sent_traffic_overflow
  2568. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2569. metric: net.net
  2570. info: average outbound utilization for the network interface ${label:device} over the last minute
  2571. os: "linux"
  2572. - name: inbound_packets_dropped_ratio
  2573. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2574. metric: net.drops
  2575. info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
  2576. os: "linux"
  2577. - name: outbound_packets_dropped_ratio
  2578. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2579. metric: net.drops
  2580. info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
  2581. os: "linux"
  2582. - name: wifi_inbound_packets_dropped_ratio
  2583. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2584. metric: net.drops
  2585. info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
  2586. os: "linux"
  2587. - name: wifi_outbound_packets_dropped_ratio
  2588. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2589. metric: net.drops
  2590. info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
  2591. os: "linux"
  2592. - name: 1m_received_packets_rate
  2593. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2594. metric: net.packets
  2595. info: average number of packets received by the network interface ${label:device} over the last minute
  2596. os: "linux freebsd"
  2597. - name: 10s_received_packets_storm
  2598. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2599. metric: net.packets
  2600. info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute
  2601. os: "linux freebsd"
  2602. - name: 10min_fifo_errors
  2603. link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
  2604. metric: net.fifo
  2605. info: number of FIFO errors for the network interface ${label:device} in the last 10 minutes
  2606. os: "linux"
  2607. metrics:
  2608. folding:
  2609. title: Metrics
  2610. enabled: false
  2611. description: ""
  2612. availability: []
  2613. scopes:
  2614. - name: global
  2615. description: ""
  2616. labels: []
  2617. metrics:
  2618. - name: system.net
  2619. description: Physical Network Interfaces Aggregated Bandwidth
  2620. unit: "kilobits/s"
  2621. chart_type: area
  2622. dimensions:
  2623. - name: received
  2624. - name: sent
  2625. - name: network device
  2626. description: ""
  2627. labels:
  2628. - name: interface_type
  2629. description: TBD
  2630. - name: device
  2631. description: TBD
  2632. metrics:
  2633. - name: net.net
  2634. description: Bandwidth
  2635. unit: "kilobits/s"
  2636. chart_type: area
  2637. dimensions:
  2638. - name: received
  2639. - name: sent
  2640. - name: net.speed
  2641. description: Interface Speed
  2642. unit: "kilobits/s"
  2643. chart_type: line
  2644. dimensions:
  2645. - name: speed
  2646. - name: net.duplex
  2647. description: Interface Duplex State
  2648. unit: "state"
  2649. chart_type: line
  2650. dimensions:
  2651. - name: full
  2652. - name: half
  2653. - name: unknown
  2654. - name: net.operstate
  2655. description: Interface Operational State
  2656. unit: "state"
  2657. chart_type: line
  2658. dimensions:
  2659. - name: up
  2660. - name: down
  2661. - name: notpresent
  2662. - name: lowerlayerdown
  2663. - name: testing
  2664. - name: dormant
  2665. - name: unknown
  2666. - name: net.carrier
  2667. description: Interface Physical Link State
  2668. unit: "state"
  2669. chart_type: line
  2670. dimensions:
  2671. - name: up
  2672. - name: down
  2673. - name: net.mtu
  2674. description: Interface MTU
  2675. unit: "octets"
  2676. chart_type: line
  2677. dimensions:
  2678. - name: mtu
  2679. - name: net.packets
  2680. description: Packets
  2681. unit: "packets/s"
  2682. chart_type: line
  2683. dimensions:
  2684. - name: received
  2685. - name: sent
  2686. - name: multicast
  2687. - name: net.errors
  2688. description: Interface Errors
  2689. unit: "errors/s"
  2690. chart_type: line
  2691. dimensions:
  2692. - name: inbound
  2693. - name: outbound
  2694. - name: net.drops
  2695. description: Interface Drops
  2696. unit: "drops/s"
  2697. chart_type: line
  2698. dimensions:
  2699. - name: inbound
  2700. - name: outbound
  2701. - name: net.fifo
  2702. description: Interface FIFO Buffer Errors
  2703. unit: "errors"
  2704. chart_type: line
  2705. dimensions:
  2706. - name: receive
  2707. - name: transmit
  2708. - name: net.compressed
  2709. description: Compressed Packets
  2710. unit: "packets/s"
  2711. chart_type: line
  2712. dimensions:
  2713. - name: received
  2714. - name: sent
  2715. - name: net.events
  2716. description: Network Interface Events
  2717. unit: "events/s"
  2718. chart_type: line
  2719. dimensions:
  2720. - name: frames
  2721. - name: collisions
  2722. - name: carrier
  2723. - meta:
  2724. plugin_name: proc.plugin
  2725. module_name: /proc/net/wireless
  2726. monitored_instance:
  2727. name: Wireless network interfaces
  2728. link: ""
  2729. categories:
  2730. - data-collection.linux-systems.network-metrics
  2731. icon_filename: "network-wired.svg"
  2732. related_resources:
  2733. integrations:
  2734. list: []
  2735. info_provided_to_referring_integrations:
  2736. description: ""
  2737. keywords:
  2738. - wireless devices
  2739. most_popular: false
  2740. overview:
  2741. data_collection:
  2742. metrics_description: "Monitor wireless devices with metrics about status, link quality, signal level, noise level and more."
  2743. method_description: ""
  2744. supported_platforms:
  2745. include: []
  2746. exclude: []
  2747. multi_instance: true
  2748. additional_permissions:
  2749. description: ""
  2750. default_behavior:
  2751. auto_detection:
  2752. description: ""
  2753. limits:
  2754. description: ""
  2755. performance_impact:
  2756. description: ""
  2757. setup:
  2758. prerequisites:
  2759. list: []
  2760. configuration:
  2761. file:
  2762. name: ""
  2763. description: ""
  2764. options:
  2765. description: ""
  2766. folding:
  2767. title: ""
  2768. enabled: true
  2769. list: []
  2770. examples:
  2771. folding:
  2772. enabled: true
  2773. title: ""
  2774. list: []
  2775. troubleshooting:
  2776. problems:
  2777. list: []
  2778. alerts: []
  2779. metrics:
  2780. folding:
  2781. title: Metrics
  2782. enabled: false
  2783. description: ""
  2784. availability: []
  2785. scopes:
  2786. - name: wireless device
  2787. description: ""
  2788. labels: []
  2789. metrics:
  2790. - name: wireless.status
  2791. description: Internal status reported by interface.
  2792. unit: "status"
  2793. chart_type: line
  2794. dimensions:
  2795. - name: status
  2796. - name: wireless.link_quality
  2797. description: Overall quality of the link. This is an aggregate value, and depends on the driver and hardware.
  2798. unit: "value"
  2799. chart_type: line
  2800. dimensions:
  2801. - name: link_quality
  2802. - name: wireless.signal_level
  2803. description:
  2804. The signal level is the wireless signal power level received by the wireless client. The closer the value is to 0, the stronger the
  2805. signal.
  2806. unit: "dBm"
  2807. chart_type: line
  2808. dimensions:
  2809. - name: signal_level
  2810. - name: wireless.noise_level
  2811. description:
  2812. The noise level indicates the amount of background noise in your environment. The closer the value to 0, the greater the noise level.
  2813. unit: "dBm"
  2814. chart_type: line
  2815. dimensions:
  2816. - name: noise_level
  2817. - name: wireless.discarded_packets
  2818. description: Packet discarded in the wireless adapter due to wireless specific problems.
  2819. unit: "packets/s"
  2820. chart_type: line
  2821. dimensions:
  2822. - name: nwid
  2823. - name: crypt
  2824. - name: frag
  2825. - name: retry
  2826. - name: misc
  2827. - name: wireless.missed_beacons
  2828. description: Number of missed beacons.
  2829. unit: "frames/s"
  2830. chart_type: line
  2831. dimensions:
  2832. - name: missed_beacons
  2833. - meta:
  2834. plugin_name: proc.plugin
  2835. module_name: /sys/class/infiniband
  2836. monitored_instance:
  2837. name: InfiniBand
  2838. link: ""
  2839. categories:
  2840. - data-collection.linux-systems.network-metrics
  2841. icon_filename: "network-wired.svg"
  2842. related_resources:
  2843. integrations:
  2844. list: []
  2845. info_provided_to_referring_integrations:
  2846. description: ""
  2847. keywords:
  2848. - infiniband
  2849. - rdma
  2850. most_popular: false
  2851. overview:
  2852. data_collection:
  2853. metrics_description: "This integration monitors InfiniBand network inteface statistics."
  2854. method_description: ""
  2855. supported_platforms:
  2856. include: []
  2857. exclude: []
  2858. multi_instance: true
  2859. additional_permissions:
  2860. description: ""
  2861. default_behavior:
  2862. auto_detection:
  2863. description: ""
  2864. limits:
  2865. description: ""
  2866. performance_impact:
  2867. description: ""
  2868. setup:
  2869. prerequisites:
  2870. list: []
  2871. configuration:
  2872. file:
  2873. name: ""
  2874. description: ""
  2875. options:
  2876. description: ""
  2877. folding:
  2878. title: ""
  2879. enabled: true
  2880. list: []
  2881. examples:
  2882. folding:
  2883. enabled: true
  2884. title: ""
  2885. list: []
  2886. troubleshooting:
  2887. problems:
  2888. list: []
  2889. alerts: []
  2890. metrics:
  2891. folding:
  2892. title: Metrics
  2893. enabled: false
  2894. description: ""
  2895. availability: []
  2896. scopes:
  2897. - name: infiniband port
  2898. description: ""
  2899. labels: []
  2900. metrics:
  2901. - name: ib.bytes
  2902. description: Bandwidth usage
  2903. unit: "kilobits/s"
  2904. chart_type: area
  2905. dimensions:
  2906. - name: Received
  2907. - name: Sent
  2908. - name: ib.packets
  2909. description: Packets Statistics
  2910. unit: "packets/s"
  2911. chart_type: area
  2912. dimensions:
  2913. - name: Received
  2914. - name: Sent
  2915. - name: Mcast_rcvd
  2916. - name: Mcast_sent
  2917. - name: Ucast_rcvd
  2918. - name: Ucast_sent
  2919. - name: ib.errors
  2920. description: Error Counters
  2921. unit: "errors/s"
  2922. chart_type: line
  2923. dimensions:
  2924. - name: Pkts_malformated
  2925. - name: Pkts_rcvd_discarded
  2926. - name: Pkts_sent_discarded
  2927. - name: Tick_Wait_to_send
  2928. - name: Pkts_missed_resource
  2929. - name: Buffer_overrun
  2930. - name: Link_Downed
  2931. - name: Link_recovered
  2932. - name: Link_integrity_err
  2933. - name: Link_minor_errors
  2934. - name: Pkts_rcvd_with_EBP
  2935. - name: Pkts_rcvd_discarded_by_switch
  2936. - name: Pkts_sent_discarded_by_switch
  2937. - name: ib.hwerrors
  2938. description: Hardware Errors
  2939. unit: "errors/s"
  2940. chart_type: line
  2941. dimensions:
  2942. - name: Duplicated_packets
  2943. - name: Pkt_Seq_Num_gap
  2944. - name: Ack_timer_expired
  2945. - name: Drop_missing_buffer
  2946. - name: Drop_out_of_sequence
  2947. - name: NAK_sequence_rcvd
  2948. - name: CQE_err_Req
  2949. - name: CQE_err_Resp
  2950. - name: CQE_Flushed_err_Req
  2951. - name: CQE_Flushed_err_Resp
  2952. - name: Remote_access_err_Req
  2953. - name: Remote_access_err_Resp
  2954. - name: Remote_invalid_req
  2955. - name: Local_length_err_Resp
  2956. - name: RNR_NAK_Packets
  2957. - name: CNP_Pkts_ignored
  2958. - name: RoCE_ICRC_Errors
  2959. - name: ib.hwpackets
  2960. description: Hardware Packets Statistics
  2961. unit: "packets/s"
  2962. chart_type: line
  2963. dimensions:
  2964. - name: RoCEv2_Congestion_sent
  2965. - name: RoCEv2_Congestion_rcvd
  2966. - name: IB_Congestion_handled
  2967. - name: ATOMIC_req_rcvd
  2968. - name: Connection_req_rcvd
  2969. - name: Read_req_rcvd
  2970. - name: Write_req_rcvd
  2971. - name: RoCE_retrans_adaptive
  2972. - name: RoCE_retrans_timeout
  2973. - name: RoCE_slow_restart
  2974. - name: RoCE_slow_restart_congestion
  2975. - name: RoCE_slow_restart_count
  2976. - meta:
  2977. plugin_name: proc.plugin
  2978. module_name: /proc/net/netstat
  2979. monitored_instance:
  2980. name: Network statistics
  2981. link: ""
  2982. categories:
  2983. - data-collection.linux-systems.network-metrics
  2984. icon_filename: "network-wired.svg"
  2985. related_resources:
  2986. integrations:
  2987. list: []
  2988. info_provided_to_referring_integrations:
  2989. description: ""
  2990. keywords:
  2991. - ip
  2992. - udp
  2993. - udplite
  2994. - icmp
  2995. - netstat
  2996. - snmp
  2997. most_popular: false
  2998. overview:
  2999. data_collection:
  3000. metrics_description: "This integration provides metrics from the `netstat`, `snmp` and `snmp6` modules."
  3001. method_description: ""
  3002. supported_platforms:
  3003. include: []
  3004. exclude: []
  3005. multi_instance: true
  3006. additional_permissions:
  3007. description: ""
  3008. default_behavior:
  3009. auto_detection:
  3010. description: ""
  3011. limits:
  3012. description: ""
  3013. performance_impact:
  3014. description: ""
  3015. setup:
  3016. prerequisites:
  3017. list: []
  3018. configuration:
  3019. file:
  3020. name: ""
  3021. description: ""
  3022. options:
  3023. description: ""
  3024. folding:
  3025. title: ""
  3026. enabled: true
  3027. list: []
  3028. examples:
  3029. folding:
  3030. enabled: true
  3031. title: ""
  3032. list: []
  3033. troubleshooting:
  3034. problems:
  3035. list: []
  3036. alerts:
  3037. - name: 1m_tcp_syn_queue_drops
  3038. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf
  3039. metric: ip.tcp_syn_queue
  3040. info: average number of SYN requests was dropped due to the full TCP SYN queue over the last minute (SYN cookies were not enabled)
  3041. os: "linux"
  3042. - name: 1m_tcp_syn_queue_cookies
  3043. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf
  3044. metric: ip.tcp_syn_queue
  3045. info: average number of sent SYN cookies due to the full TCP SYN queue over the last minute
  3046. os: "linux"
  3047. - name: 1m_tcp_accept_queue_overflows
  3048. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf
  3049. metric: ip.tcp_accept_queue
  3050. info: average number of overflows in the TCP accept queue over the last minute
  3051. os: "linux"
  3052. - name: 1m_tcp_accept_queue_drops
  3053. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf
  3054. metric: ip.tcp_accept_queue
  3055. info: average number of dropped packets in the TCP accept queue over the last minute
  3056. os: "linux"
  3057. - name: tcp_connections
  3058. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf
  3059. metric: ip.tcpsock
  3060. info: TCP connections utilization
  3061. os: "linux"
  3062. - name: 1m_ip_tcp_resets_sent
  3063. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
  3064. metric: ip.tcphandshake
  3065. info: average number of sent TCP RESETS over the last minute
  3066. os: "linux"
  3067. - name: 10s_ip_tcp_resets_sent
  3068. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
  3069. metric: ip.tcphandshake
  3070. info:
  3071. average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has
  3072. crashed. Netdata will not send a clear notification for this alarm.
  3073. os: "linux"
  3074. - name: 1m_ip_tcp_resets_received
  3075. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
  3076. metric: ip.tcphandshake
  3077. info: average number of received TCP RESETS over the last minute
  3078. os: "linux freebsd"
  3079. - name: 10s_ip_tcp_resets_received
  3080. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
  3081. metric: ip.tcphandshake
  3082. info:
  3083. average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed.
  3084. Netdata will not send a clear notification for this alarm.
  3085. os: "linux freebsd"
  3086. - name: 1m_ipv4_udp_receive_buffer_errors
  3087. link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
  3088. metric: ipv4.udperrors
  3089. info: average number of UDP receive buffer errors over the last minute
  3090. os: "linux freebsd"
  3091. - name: 1m_ipv4_udp_send_buffer_errors
  3092. link: https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf
  3093. metric: ipv4.udperrors
  3094. info: average number of UDP send buffer errors over the last minute
  3095. os: "linux"
  3096. metrics:
  3097. folding:
  3098. title: Metrics
  3099. enabled: false
  3100. description: ""
  3101. availability: []
  3102. scopes:
  3103. - name: global
  3104. description: ""
  3105. labels: []
  3106. metrics:
  3107. - name: system.ip
  3108. description: IPv4 Bandwidth
  3109. unit: "kilobits/s"
  3110. chart_type: area
  3111. dimensions:
  3112. - name: received
  3113. - name: sent
  3114. - name: ip.tcpmemorypressures
  3115. description: TCP Memory Pressures
  3116. unit: "events/s"
  3117. chart_type: line
  3118. dimensions:
  3119. - name: pressures
  3120. - name: ip.tcpconnaborts
  3121. description: TCP Connection Aborts
  3122. unit: "connections/s"
  3123. chart_type: line
  3124. dimensions:
  3125. - name: baddata
  3126. - name: userclosed
  3127. - name: nomemory
  3128. - name: timeout
  3129. - name: linger
  3130. - name: failed
  3131. - name: ip.tcpreorders
  3132. description: TCP Reordered Packets by Detection Method
  3133. unit: "packets/s"
  3134. chart_type: line
  3135. dimensions:
  3136. - name: timestamp
  3137. - name: sack
  3138. - name: fack
  3139. - name: reno
  3140. - name: ip.tcpofo
  3141. description: TCP Out-Of-Order Queue
  3142. unit: "packets/s"
  3143. chart_type: line
  3144. dimensions:
  3145. - name: inqueue
  3146. - name: dropped
  3147. - name: merged
  3148. - name: pruned
  3149. - name: ip.tcpsyncookies
  3150. description: TCP SYN Cookies
  3151. unit: "packets/s"
  3152. chart_type: line
  3153. dimensions:
  3154. - name: received
  3155. - name: sent
  3156. - name: failed
  3157. - name: ip.tcp_syn_queue
  3158. description: TCP SYN Queue Issues
  3159. unit: "packets/s"
  3160. chart_type: line
  3161. dimensions:
  3162. - name: drops
  3163. - name: cookies
  3164. - name: ip.tcp_accept_queue
  3165. description: TCP Accept Queue Issues
  3166. unit: "packets/s"
  3167. chart_type: line
  3168. dimensions:
  3169. - name: overflows
  3170. - name: drops
  3171. - name: ip.tcpsock
  3172. description: IPv4 TCP Connections
  3173. unit: "active connections"
  3174. chart_type: line
  3175. dimensions:
  3176. - name: connections
  3177. - name: ip.tcppackets
  3178. description: IPv4 TCP Packets
  3179. unit: "packets/s"
  3180. chart_type: line
  3181. dimensions:
  3182. - name: received
  3183. - name: sent
  3184. - name: ip.tcperrors
  3185. description: IPv4 TCP Errors
  3186. unit: "packets/s"
  3187. chart_type: line
  3188. dimensions:
  3189. - name: InErrs
  3190. - name: InCsumErrors
  3191. - name: RetransSegs
  3192. - name: ip.tcpopens
  3193. description: IPv4 TCP Opens
  3194. unit: "connections/s"
  3195. chart_type: line
  3196. dimensions:
  3197. - name: active
  3198. - name: passive
  3199. - name: ip.tcphandshake
  3200. description: IPv4 TCP Handshake Issues
  3201. unit: "events/s"
  3202. chart_type: line
  3203. dimensions:
  3204. - name: EstabResets
  3205. - name: OutRsts
  3206. - name: AttemptFails
  3207. - name: SynRetrans
  3208. - name: ipv4.packets
  3209. description: IPv4 Packets
  3210. unit: "packets/s"
  3211. chart_type: line
  3212. dimensions:
  3213. - name: received
  3214. - name: sent
  3215. - name: forwarded
  3216. - name: delivered
  3217. - name: ipv4.errors
  3218. description: IPv4 Errors
  3219. unit: "packets/s"
  3220. chart_type: line
  3221. dimensions:
  3222. - name: InDiscards
  3223. - name: OutDiscards
  3224. - name: InNoRoutes
  3225. - name: OutNoRoutes
  3226. - name: InHdrErrors
  3227. - name: InAddrErrors
  3228. - name: InTruncatedPkts
  3229. - name: InCsumErrors
  3230. - name: ipc4.bcast
  3231. description: IP Broadcast Bandwidth
  3232. unit: "kilobits/s"
  3233. chart_type: area
  3234. dimensions:
  3235. - name: received
  3236. - name: sent
  3237. - name: ipv4.bcastpkts
  3238. description: IP Broadcast Packets
  3239. unit: "packets/s"
  3240. chart_type: line
  3241. dimensions:
  3242. - name: received
  3243. - name: sent
  3244. - name: ipv4.mcast
  3245. description: IPv4 Multicast Bandwidth
  3246. unit: "kilobits/s"
  3247. chart_type: area
  3248. dimensions:
  3249. - name: received
  3250. - name: sent
  3251. - name: ipv4.mcastpkts
  3252. description: IP Multicast Packets
  3253. unit: "packets/s"
  3254. chart_type: line
  3255. dimensions:
  3256. - name: received
  3257. - name: sent
  3258. - name: ipv4.icmp
  3259. description: IPv4 ICMP Packets
  3260. unit: "packets/s"
  3261. chart_type: line
  3262. dimensions:
  3263. - name: received
  3264. - name: sent
  3265. - name: ipv4.icmpmsg
  3266. description: IPv4 ICMP Messages
  3267. unit: "packets/s"
  3268. chart_type: line
  3269. dimensions:
  3270. - name: InEchoReps
  3271. - name: OutEchoReps
  3272. - name: InDestUnreachs
  3273. - name: OutDestUnreachs
  3274. - name: InRedirects
  3275. - name: OutRedirects
  3276. - name: InEchos
  3277. - name: OutEchos
  3278. - name: InRouterAdvert
  3279. - name: OutRouterAdvert
  3280. - name: InRouterSelect
  3281. - name: OutRouterSelect
  3282. - name: InTimeExcds
  3283. - name: OutTimeExcds
  3284. - name: InParmProbs
  3285. - name: OutParmProbs
  3286. - name: InTimestamps
  3287. - name: OutTimestamps
  3288. - name: InTimestampReps
  3289. - name: OutTimestampReps
  3290. - name: ipv4.icmp_errors
  3291. description: IPv4 ICMP Errors
  3292. unit: "packets/s"
  3293. chart_type: line
  3294. dimensions:
  3295. - name: InErrors
  3296. - name: OutErrors
  3297. - name: InCsumErrors
  3298. - name: ipv4.udppackets
  3299. description: IPv4 UDP Packets
  3300. unit: "packets/s"
  3301. chart_type: line
  3302. dimensions:
  3303. - name: received
  3304. - name: sent
  3305. - name: ipv4.udperrors
  3306. description: IPv4 UDP Errors
  3307. unit: "events/s"
  3308. chart_type: line
  3309. dimensions:
  3310. - name: RcvbufErrors
  3311. - name: SndbufErrors
  3312. - name: InErrors
  3313. - name: NoPorts
  3314. - name: InCsumErrors
  3315. - name: IgnoredMulti
  3316. - name: ipv4.udplite
  3317. description: IPv4 UDPLite Packets
  3318. unit: "packets/s"
  3319. chart_type: line
  3320. dimensions:
  3321. - name: received
  3322. - name: sent
  3323. - name: ipv4.udplite_errors
  3324. description: IPv4 UDPLite Errors
  3325. unit: "packets/s"
  3326. chart_type: line
  3327. dimensions:
  3328. - name: RcvbufErrors
  3329. - name: SndbufErrors
  3330. - name: InErrors
  3331. - name: NoPorts
  3332. - name: InCsumErrors
  3333. - name: IgnoredMulti
  3334. - name: ipv4.ecnpkts
  3335. description: IP ECN Statistics
  3336. unit: "packets/s"
  3337. chart_type: line
  3338. dimensions:
  3339. - name: CEP
  3340. - name: NoECTP
  3341. - name: ECTP0
  3342. - name: ECTP1
  3343. - name: ipv4.fragsin
  3344. description: IPv4 Fragments Reassembly
  3345. unit: "packets/s"
  3346. chart_type: line
  3347. dimensions:
  3348. - name: ok
  3349. - name: failed
  3350. - name: all
  3351. - name: ipv4.fragsout
  3352. description: IPv4 Fragments Sent
  3353. unit: "packets/s"
  3354. chart_type: line
  3355. dimensions:
  3356. - name: ok
  3357. - name: failed
  3358. - name: created
  3359. - name: system.ipv6
  3360. description: IPv6 Bandwidth
  3361. unit: "kilobits/s"
  3362. chart_type: area
  3363. dimensions:
  3364. - name: received
  3365. - name: sent
  3366. - name: ipv6.packets
  3367. description: IPv6 Packets
  3368. unit: "packets/s"
  3369. chart_type: line
  3370. dimensions:
  3371. - name: received
  3372. - name: sent
  3373. - name: forwarded
  3374. - name: delivers
  3375. - name: ipv6.errors
  3376. description: IPv6 Errors
  3377. unit: "packets/s"
  3378. chart_type: line
  3379. dimensions:
  3380. - name: InDiscards
  3381. - name: OutDiscards
  3382. - name: InHdrErrors
  3383. - name: InAddrErrors
  3384. - name: InUnknownProtos
  3385. - name: InTooBigErrors
  3386. - name: InTruncatedPkts
  3387. - name: InNoRoutes
  3388. - name: OutNoRoutes
  3389. - name: ipv6.bcast
  3390. description: IPv6 Broadcast Bandwidth
  3391. unit: "kilobits/s"
  3392. chart_type: area
  3393. dimensions:
  3394. - name: received
  3395. - name: sent
  3396. - name: ipv6.mcast
  3397. description: IPv6 Multicast Bandwidth
  3398. unit: "kilobits/s"
  3399. chart_type: area
  3400. dimensions:
  3401. - name: received
  3402. - name: sent
  3403. - name: ipv6.mcastpkts
  3404. description: IPv6 Multicast Packets
  3405. unit: "packets/s"
  3406. chart_type: line
  3407. dimensions:
  3408. - name: received
  3409. - name: sent
  3410. - name: ipv6.udppackets
  3411. description: IPv6 UDP Packets
  3412. unit: "packets/s"
  3413. chart_type: line
  3414. dimensions:
  3415. - name: received
  3416. - name: sent
  3417. - name: ipv6.udperrors
  3418. description: IPv6 UDP Errors
  3419. unit: "events/s"
  3420. chart_type: line
  3421. dimensions:
  3422. - name: RcvbufErrors
  3423. - name: SndbufErrors
  3424. - name: InErrors
  3425. - name: NoPorts
  3426. - name: InCsumErrors
  3427. - name: IgnoredMulti
  3428. - name: ipv6.udplitepackets
  3429. description: IPv6 UDPlite Packets
  3430. unit: "packets/s"
  3431. chart_type: line
  3432. dimensions:
  3433. - name: received
  3434. - name: sent
  3435. - name: ipv6.udpliteerrors
  3436. description: IPv6 UDP Lite Errors
  3437. unit: "events/s"
  3438. chart_type: line
  3439. dimensions:
  3440. - name: RcvbufErrors
  3441. - name: SndbufErrors
  3442. - name: InErrors
  3443. - name: NoPorts
  3444. - name: InCsumErrors
  3445. - name: ipv6.icmp
  3446. description: IPv6 ICMP Messages
  3447. unit: "messages/s"
  3448. chart_type: line
  3449. dimensions:
  3450. - name: received
  3451. - name: sent
  3452. - name: ipv6.icmpredir
  3453. description: IPv6 ICMP Redirects
  3454. unit: "redirects/s"
  3455. chart_type: line
  3456. dimensions:
  3457. - name: received
  3458. - name: sent
  3459. - name: ipv6.icmperrors
  3460. description: IPv6 ICMP Errors
  3461. unit: "errors/s"
  3462. chart_type: line
  3463. dimensions:
  3464. - name: InErrors
  3465. - name: OutErrors
  3466. - name: InCsumErrors
  3467. - name: InDestUnreachs
  3468. - name: InPktTooBigs
  3469. - name: InTimeExcds
  3470. - name: InParmProblems
  3471. - name: OutDestUnreachs
  3472. - name: OutPktTooBigs
  3473. - name: OutTimeExcds
  3474. - name: OutParmProblems
  3475. - name: ipv6.icmpechos
  3476. description: IPv6 ICMP Echo
  3477. unit: "messages/s"
  3478. chart_type: line
  3479. dimensions:
  3480. - name: InEchos
  3481. - name: OutEchos
  3482. - name: InEchoReplies
  3483. - name: OutEchoReplies
  3484. - name: ipv6.groupmemb
  3485. description: IPv6 ICMP Group Membership
  3486. unit: "messages/s"
  3487. chart_type: line
  3488. dimensions:
  3489. - name: InQueries
  3490. - name: OutQueries
  3491. - name: InResponses
  3492. - name: OutResponses
  3493. - name: InReductions
  3494. - name: OutReductions
  3495. - name: ipv6.icmprouter
  3496. description: IPv6 Router Messages
  3497. unit: "messages/s"
  3498. chart_type: line
  3499. dimensions:
  3500. - name: InSolicits
  3501. - name: OutSolicits
  3502. - name: InAdvertisements
  3503. - name: OutAdvertisements
  3504. - name: ipv6.icmpneighbor
  3505. description: IPv6 Neighbor Messages
  3506. unit: "messages/s"
  3507. chart_type: line
  3508. dimensions:
  3509. - name: InSolicits
  3510. - name: OutSolicits
  3511. - name: InAdvertisements
  3512. - name: OutAdvertisements
  3513. - name: ipv6.icmpmldv2
  3514. description: IPv6 ICMP MLDv2 Reports
  3515. unit: "reports/s"
  3516. chart_type: line
  3517. dimensions:
  3518. - name: received
  3519. - name: sent
  3520. - name: ipv6.icmptypes
  3521. description: IPv6 ICMP Types
  3522. unit: "messages/s"
  3523. chart_type: line
  3524. dimensions:
  3525. - name: InType1
  3526. - name: InType128
  3527. - name: InType129
  3528. - name: InType136
  3529. - name: OutType1
  3530. - name: OutType128
  3531. - name: OutType129
  3532. - name: OutType133
  3533. - name: OutType135
  3534. - name: OutType143
  3535. - name: ipv6.ect
  3536. description: IPv6 ECT Packets
  3537. unit: "packets/s"
  3538. chart_type: line
  3539. dimensions:
  3540. - name: InNoECTPkts
  3541. - name: InECT1Pkts
  3542. - name: InECT0Pkts
  3543. - name: InCEPkts
  3544. - name: ipv6.ect
  3545. description: IPv6 ECT Packets
  3546. unit: "packets/s"
  3547. chart_type: line
  3548. dimensions:
  3549. - name: InNoECTPkts
  3550. - name: InECT1Pkts
  3551. - name: InECT0Pkts
  3552. - name: InCEPkts
  3553. - name: ipv6.fragsin
  3554. description: IPv6 Fragments Reassembly
  3555. unit: "packets/s"
  3556. chart_type: line
  3557. dimensions:
  3558. - name: ok
  3559. - name: failed
  3560. - name: timeout
  3561. - name: all
  3562. - name: ipv6.fragsout
  3563. description: IPv6 Fragments Sent
  3564. unit: "packets/s"
  3565. chart_type: line
  3566. dimensions:
  3567. - name: ok
  3568. - name: failed
  3569. - name: all
  3570. - meta:
  3571. plugin_name: proc.plugin
  3572. module_name: /proc/net/sockstat
  3573. monitored_instance:
  3574. name: Socket statistics
  3575. link: ""
  3576. categories:
  3577. - data-collection.linux-systems.network-metrics
  3578. icon_filename: "network-wired.svg"
  3579. related_resources:
  3580. integrations:
  3581. list: []
  3582. info_provided_to_referring_integrations:
  3583. description: ""
  3584. keywords:
  3585. - sockets
  3586. most_popular: false
  3587. overview:
  3588. data_collection:
  3589. metrics_description: "This integration provides socket statistics."
  3590. method_description: ""
  3591. supported_platforms:
  3592. include: []
  3593. exclude: []
  3594. multi_instance: true
  3595. additional_permissions:
  3596. description: ""
  3597. default_behavior:
  3598. auto_detection:
  3599. description: ""
  3600. limits:
  3601. description: ""
  3602. performance_impact:
  3603. description: ""
  3604. setup:
  3605. prerequisites:
  3606. list: []
  3607. configuration:
  3608. file:
  3609. name: ""
  3610. description: ""
  3611. options:
  3612. description: ""
  3613. folding:
  3614. title: ""
  3615. enabled: true
  3616. list: []
  3617. examples:
  3618. folding:
  3619. enabled: true
  3620. title: ""
  3621. list: []
  3622. troubleshooting:
  3623. problems:
  3624. list: []
  3625. alerts:
  3626. - name: tcp_orphans
  3627. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_orphans.conf
  3628. metric: ipv4.sockstat_tcp_sockets
  3629. info: orphan IPv4 TCP sockets utilization
  3630. os: "linux"
  3631. - name: tcp_memory
  3632. link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_mem.conf
  3633. metric: ipv4.sockstat_tcp_mem
  3634. info: TCP memory utilization
  3635. os: "linux"
  3636. metrics:
  3637. folding:
  3638. title: Metrics
  3639. enabled: false
  3640. description: ""
  3641. availability: []
  3642. scopes:
  3643. - name: global
  3644. description: ""
  3645. labels: []
  3646. metrics:
  3647. - name: ip.sockstat_sockets
  3648. description: Sockets used for all address families
  3649. unit: "sockets"
  3650. chart_type: line
  3651. dimensions:
  3652. - name: used
  3653. - name: ipv4.sockstat_tcp_sockets
  3654. description: IPv4 TCP Sockets
  3655. unit: "sockets"
  3656. chart_type: line
  3657. dimensions:
  3658. - name: alloc
  3659. - name: orphan
  3660. - name: inuse
  3661. - name: timewait
  3662. - name: ipv4.sockstat_tcp_mem
  3663. description: IPv4 TCP Sockets Memory
  3664. unit: "KiB"
  3665. chart_type: area
  3666. dimensions:
  3667. - name: mem
  3668. - name: ipv4.sockstat_udp_sockets
  3669. description: IPv4 UDP Sockets
  3670. unit: "sockets"
  3671. chart_type: line
  3672. dimensions:
  3673. - name: inuse
  3674. - name: ipv4.sockstat_udp_mem
  3675. description: IPv4 UDP Sockets Memory
  3676. unit: "sockets"
  3677. chart_type: line
  3678. dimensions:
  3679. - name: mem
  3680. - name: ipv4.sockstat_udplite_sockets
  3681. description: IPv4 UDPLITE Sockets
  3682. unit: "sockets"
  3683. chart_type: line
  3684. dimensions:
  3685. - name: inuse
  3686. - name: ipv4.sockstat_raw_sockets
  3687. description: IPv4 RAW Sockets
  3688. unit: "sockets"
  3689. chart_type: line
  3690. dimensions:
  3691. - name: inuse
  3692. - name: ipv4.sockstat_frag_sockets
  3693. description: IPv4 FRAG Sockets
  3694. unit: "fragments"
  3695. chart_type: line
  3696. dimensions:
  3697. - name: inuse
  3698. - name: ipv4.sockstat_frag_mem
  3699. description: IPv4 FRAG Sockets Memory
  3700. unit: "KiB"
  3701. chart_type: area
  3702. dimensions:
  3703. - name: mem
  3704. - meta:
  3705. plugin_name: proc.plugin
  3706. module_name: /proc/net/sockstat6
  3707. monitored_instance:
  3708. name: IPv6 Socket Statistics
  3709. link: ""
  3710. categories:
  3711. - data-collection.linux-systems.network-metrics
  3712. icon_filename: "network-wired.svg"
  3713. related_resources:
  3714. integrations:
  3715. list: []
  3716. info_provided_to_referring_integrations:
  3717. description: ""
  3718. keywords:
  3719. - ipv6 sockets
  3720. most_popular: false
  3721. overview:
  3722. data_collection:
  3723. metrics_description: "This integration provides IPv6 socket statistics."
  3724. method_description: ""
  3725. supported_platforms:
  3726. include: []
  3727. exclude: []
  3728. multi_instance: true
  3729. additional_permissions:
  3730. description: ""
  3731. default_behavior:
  3732. auto_detection:
  3733. description: ""
  3734. limits:
  3735. description: ""
  3736. performance_impact:
  3737. description: ""
  3738. setup:
  3739. prerequisites:
  3740. list: []
  3741. configuration:
  3742. file:
  3743. name: ""
  3744. description: ""
  3745. options:
  3746. description: ""
  3747. folding:
  3748. title: ""
  3749. enabled: true
  3750. list: []
  3751. examples:
  3752. folding:
  3753. enabled: true
  3754. title: ""
  3755. list: []
  3756. troubleshooting:
  3757. problems:
  3758. list: []
  3759. alerts: []
  3760. metrics:
  3761. folding:
  3762. title: Metrics
  3763. enabled: false
  3764. description: ""
  3765. availability: []
  3766. scopes:
  3767. - name: global
  3768. description: ""
  3769. labels: []
  3770. metrics:
  3771. - name: ipv6.sockstat6_tcp_sockets
  3772. description: IPv6 TCP Sockets
  3773. unit: "sockets"
  3774. chart_type: line
  3775. dimensions:
  3776. - name: inuse
  3777. - name: ipv6.sockstat6_udp_sockets
  3778. description: IPv6 UDP Sockets
  3779. unit: "sockets"
  3780. chart_type: line
  3781. dimensions:
  3782. - name: inuse
  3783. - name: ipv6.sockstat6_udplite_sockets
  3784. description: IPv6 UDPLITE Sockets
  3785. unit: "sockets"
  3786. chart_type: line
  3787. dimensions:
  3788. - name: inuse
  3789. - name: ipv6.sockstat6_raw_sockets
  3790. description: IPv6 RAW Sockets
  3791. unit: "sockets"
  3792. chart_type: line
  3793. dimensions:
  3794. - name: inuse
  3795. - name: ipv6.sockstat6_frag_sockets
  3796. description: IPv6 FRAG Sockets
  3797. unit: "fragments"
  3798. chart_type: line
  3799. dimensions:
  3800. - name: inuse
  3801. - meta:
  3802. plugin_name: proc.plugin
  3803. module_name: /proc/net/ip_vs_stats
  3804. monitored_instance:
  3805. name: IP Virtual Server
  3806. link: ""
  3807. categories:
  3808. - data-collection.linux-systems.network-metrics
  3809. icon_filename: "network-wired.svg"
  3810. related_resources:
  3811. integrations:
  3812. list: []
  3813. info_provided_to_referring_integrations:
  3814. description: ""
  3815. keywords:
  3816. - ip virtual server
  3817. most_popular: false
  3818. overview:
  3819. data_collection:
  3820. metrics_description: "This integration monitors IP Virtual Server statistics"
  3821. method_description: ""
  3822. supported_platforms:
  3823. include: []
  3824. exclude: []
  3825. multi_instance: true
  3826. additional_permissions:
  3827. description: ""
  3828. default_behavior:
  3829. auto_detection:
  3830. description: ""
  3831. limits:
  3832. description: ""
  3833. performance_impact:
  3834. description: ""
  3835. setup:
  3836. prerequisites:
  3837. list: []
  3838. configuration:
  3839. file:
  3840. name: ""
  3841. description: ""
  3842. options:
  3843. description: ""
  3844. folding:
  3845. title: ""
  3846. enabled: true
  3847. list: []
  3848. examples:
  3849. folding:
  3850. enabled: true
  3851. title: ""
  3852. list: []
  3853. troubleshooting:
  3854. problems:
  3855. list: []
  3856. alerts: []
  3857. metrics:
  3858. folding:
  3859. title: Metrics
  3860. enabled: false
  3861. description: ""
  3862. availability: []
  3863. scopes:
  3864. - name: global
  3865. description: ""
  3866. labels: []
  3867. metrics:
  3868. - name: ipvs.sockets
  3869. description: IPVS New Connections
  3870. unit: "connections/s"
  3871. chart_type: line
  3872. dimensions:
  3873. - name: connections
  3874. - name: ipvs.packets
  3875. description: IPVS Packets
  3876. unit: "packets/s"
  3877. chart_type: line
  3878. dimensions:
  3879. - name: received
  3880. - name: sent
  3881. - name: ipvs.net
  3882. description: IPVS Bandwidth
  3883. unit: "kilobits/s"
  3884. chart_type: area
  3885. dimensions:
  3886. - name: received
  3887. - name: sent
  3888. - meta:
  3889. plugin_name: proc.plugin
  3890. module_name: /proc/net/rpc/nfs
  3891. monitored_instance:
  3892. name: NFS Client
  3893. link: ""
  3894. categories:
  3895. - data-collection.linux-systems.filesystem-metrics.nfs
  3896. icon_filename: "nfs.png"
  3897. related_resources:
  3898. integrations:
  3899. list: []
  3900. info_provided_to_referring_integrations:
  3901. description: ""
  3902. keywords:
  3903. - nfs client
  3904. - filesystem
  3905. most_popular: false
  3906. overview:
  3907. data_collection:
  3908. metrics_description: "This integration provides statistics from the Linux kernel's NFS Client."
  3909. method_description: ""
  3910. supported_platforms:
  3911. include: []
  3912. exclude: []
  3913. multi_instance: true
  3914. additional_permissions:
  3915. description: ""
  3916. default_behavior:
  3917. auto_detection:
  3918. description: ""
  3919. limits:
  3920. description: ""
  3921. performance_impact:
  3922. description: ""
  3923. setup:
  3924. prerequisites:
  3925. list: []
  3926. configuration:
  3927. file:
  3928. name: ""
  3929. description: ""
  3930. options:
  3931. description: ""
  3932. folding:
  3933. title: ""
  3934. enabled: true
  3935. list: []
  3936. examples:
  3937. folding:
  3938. enabled: true
  3939. title: ""
  3940. list: []
  3941. troubleshooting:
  3942. problems:
  3943. list: []
  3944. alerts: []
  3945. metrics:
  3946. folding:
  3947. title: Metrics
  3948. enabled: false
  3949. description: ""
  3950. availability: []
  3951. scopes:
  3952. - name: global
  3953. description: ""
  3954. labels: []
  3955. metrics:
  3956. - name: nfs.net
  3957. description: NFS Client Network
  3958. unit: "operations/s"
  3959. chart_type: stacked
  3960. dimensions:
  3961. - name: udp
  3962. - name: tcp
  3963. - name: nfs.rpc
  3964. description: NFS Client Remote Procedure Calls Statistics
  3965. unit: "calls/s"
  3966. chart_type: line
  3967. dimensions:
  3968. - name: calls
  3969. - name: retransmits
  3970. - name: auth_refresh
  3971. - name: nfs.proc2
  3972. description: NFS v2 Client Remote Procedure Calls
  3973. unit: "calls/s"
  3974. chart_type: stacked
  3975. dimensions:
  3976. - name: a dimension per proc2 call
  3977. - name: nfs.proc3
  3978. description: NFS v3 Client Remote Procedure Calls
  3979. unit: "calls/s"
  3980. chart_type: stacked
  3981. dimensions:
  3982. - name: a dimension per proc3 call
  3983. - name: nfs.proc4
  3984. description: NFS v4 Client Remote Procedure Calls
  3985. unit: "calls/s"
  3986. chart_type: stacked
  3987. dimensions:
  3988. - name: a dimension per proc4 call
  3989. - meta:
  3990. plugin_name: proc.plugin
  3991. module_name: /proc/net/rpc/nfsd
  3992. monitored_instance:
  3993. name: NFS Server
  3994. link: ""
  3995. categories:
  3996. - data-collection.linux-systems.filesystem-metrics.nfs
  3997. icon_filename: "nfs.png"
  3998. related_resources:
  3999. integrations:
  4000. list: []
  4001. info_provided_to_referring_integrations:
  4002. description: ""
  4003. keywords:
  4004. - nfs server
  4005. - filesystem
  4006. most_popular: false
  4007. overview:
  4008. data_collection:
  4009. metrics_description: "This integration provides statistics from the Linux kernel's NFS Server."
  4010. method_description: ""
  4011. supported_platforms:
  4012. include: []
  4013. exclude: []
  4014. multi_instance: true
  4015. additional_permissions:
  4016. description: ""
  4017. default_behavior:
  4018. auto_detection:
  4019. description: ""
  4020. limits:
  4021. description: ""
  4022. performance_impact:
  4023. description: ""
  4024. setup:
  4025. prerequisites:
  4026. list: []
  4027. configuration:
  4028. file:
  4029. name: ""
  4030. description: ""
  4031. options:
  4032. description: ""
  4033. folding:
  4034. title: ""
  4035. enabled: true
  4036. list: []
  4037. examples:
  4038. folding:
  4039. enabled: true
  4040. title: ""
  4041. list: []
  4042. troubleshooting:
  4043. problems:
  4044. list: []
  4045. alerts: []
  4046. metrics:
  4047. folding:
  4048. title: Metrics
  4049. enabled: false
  4050. description: ""
  4051. availability: []
  4052. scopes:
  4053. - name: global
  4054. description: ""
  4055. labels: []
  4056. metrics:
  4057. - name: nfsd.readcache
  4058. description: NFS Server Read Cache
  4059. unit: "reads/s"
  4060. chart_type: stacked
  4061. dimensions:
  4062. - name: hits
  4063. - name: misses
  4064. - name: nocache
  4065. - name: nfsd.filehandles
  4066. description: NFS Server File Handles
  4067. unit: "handles/s"
  4068. chart_type: line
  4069. dimensions:
  4070. - name: stale
  4071. - name: nfsd.io
  4072. description: NFS Server I/O
  4073. unit: "kilobytes/s"
  4074. chart_type: area
  4075. dimensions:
  4076. - name: read
  4077. - name: write
  4078. - name: nfsd.threads
  4079. description: NFS Server Threads
  4080. unit: "threads"
  4081. chart_type: line
  4082. dimensions:
  4083. - name: threads
  4084. - name: nfsd.net
  4085. description: NFS Server Network Statistics
  4086. unit: "packets/s"
  4087. chart_type: line
  4088. dimensions:
  4089. - name: udp
  4090. - name: tcp
  4091. - name: nfsd.rpc
  4092. description: NFS Server Remote Procedure Calls Statistics
  4093. unit: "calls/s"
  4094. chart_type: line
  4095. dimensions:
  4096. - name: calls
  4097. - name: bad_format
  4098. - name: bad_auth
  4099. - name: nfsd.proc2
  4100. description: NFS v2 Server Remote Procedure Calls
  4101. unit: "calls/s"
  4102. chart_type: stacked
  4103. dimensions:
  4104. - name: a dimension per proc2 call
  4105. - name: nfsd.proc3
  4106. description: NFS v3 Server Remote Procedure Calls
  4107. unit: "calls/s"
  4108. chart_type: stacked
  4109. dimensions:
  4110. - name: a dimension per proc3 call
  4111. - name: nfsd.proc4
  4112. description: NFS v4 Server Remote Procedure Calls
  4113. unit: "calls/s"
  4114. chart_type: stacked
  4115. dimensions:
  4116. - name: a dimension per proc4 call
  4117. - name: nfsd.proc4ops
  4118. description: NFS v4 Server Operations
  4119. unit: "operations/s"
  4120. chart_type: stacked
  4121. dimensions:
  4122. - name: a dimension per proc4 operation
  4123. - meta:
  4124. plugin_name: proc.plugin
  4125. module_name: /proc/net/sctp/snmp
  4126. monitored_instance:
  4127. name: SCTP Statistics
  4128. link: ""
  4129. categories:
  4130. - data-collection.linux-systems.network-metrics
  4131. icon_filename: "network-wired.svg"
  4132. related_resources:
  4133. integrations:
  4134. list: []
  4135. info_provided_to_referring_integrations:
  4136. description: ""
  4137. keywords:
  4138. - sctp
  4139. - stream control transmission protocol
  4140. most_popular: false
  4141. overview:
  4142. data_collection:
  4143. metrics_description: "This integration provides statistics about the Stream Control Transmission Protocol (SCTP)."
  4144. method_description: ""
  4145. supported_platforms:
  4146. include: []
  4147. exclude: []
  4148. multi_instance: true
  4149. additional_permissions:
  4150. description: ""
  4151. default_behavior:
  4152. auto_detection:
  4153. description: ""
  4154. limits:
  4155. description: ""
  4156. performance_impact:
  4157. description: ""
  4158. setup:
  4159. prerequisites:
  4160. list: []
  4161. configuration:
  4162. file:
  4163. name: ""
  4164. description: ""
  4165. options:
  4166. description: ""
  4167. folding:
  4168. title: ""
  4169. enabled: true
  4170. list: []
  4171. examples:
  4172. folding:
  4173. enabled: true
  4174. title: ""
  4175. list: []
  4176. troubleshooting:
  4177. problems:
  4178. list: []
  4179. alerts: []
  4180. metrics:
  4181. folding:
  4182. title: Metrics
  4183. enabled: false
  4184. description: ""
  4185. availability: []
  4186. scopes:
  4187. - name: global
  4188. description: ""
  4189. labels: []
  4190. metrics:
  4191. - name: sctp.established
  4192. description: SCTP current total number of established associations
  4193. unit: "associations"
  4194. chart_type: line
  4195. dimensions:
  4196. - name: established
  4197. - name: sctp.transitions
  4198. description: SCTP Association Transitions
  4199. unit: "transitions/s"
  4200. chart_type: line
  4201. dimensions:
  4202. - name: active
  4203. - name: passive
  4204. - name: aborted
  4205. - name: shutdown
  4206. - name: sctp.packets
  4207. description: SCTP Packets
  4208. unit: "packets/s"
  4209. chart_type: line
  4210. dimensions:
  4211. - name: received
  4212. - name: sent
  4213. - name: sctp.packet_errors
  4214. description: SCTP Packet Errors
  4215. unit: "packets/s"
  4216. chart_type: line
  4217. dimensions:
  4218. - name: invalid
  4219. - name: checksum
  4220. - name: sctp.fragmentation
  4221. description: SCTP Fragmentation
  4222. unit: "packets/s"
  4223. chart_type: line
  4224. dimensions:
  4225. - name: reassembled
  4226. - name: fragmented
  4227. - meta:
  4228. plugin_name: proc.plugin
  4229. module_name: /proc/net/stat/nf_conntrack
  4230. monitored_instance:
  4231. name: Conntrack
  4232. link: ""
  4233. categories:
  4234. - data-collection.linux-systems.firewall-metrics
  4235. icon_filename: "firewall.svg"
  4236. related_resources:
  4237. integrations:
  4238. list: []
  4239. info_provided_to_referring_integrations:
  4240. description: ""
  4241. keywords:
  4242. - connection tracking mechanism
  4243. - netfilter
  4244. - conntrack
  4245. most_popular: false
  4246. overview:
  4247. data_collection:
  4248. metrics_description: "This integration monitors the connection tracking mechanism of Netfilter in the Linux Kernel."
  4249. method_description: ""
  4250. supported_platforms:
  4251. include: []
  4252. exclude: []
  4253. multi_instance: true
  4254. additional_permissions:
  4255. description: ""
  4256. default_behavior:
  4257. auto_detection:
  4258. description: ""
  4259. limits:
  4260. description: ""
  4261. performance_impact:
  4262. description: ""
  4263. setup:
  4264. prerequisites:
  4265. list: []
  4266. configuration:
  4267. file:
  4268. name: ""
  4269. description: ""
  4270. options:
  4271. description: ""
  4272. folding:
  4273. title: ""
  4274. enabled: true
  4275. list: []
  4276. examples:
  4277. folding:
  4278. enabled: true
  4279. title: ""
  4280. list: []
  4281. troubleshooting:
  4282. problems:
  4283. list: []
  4284. alerts:
  4285. - name: netfilter_conntrack_full
  4286. link: https://github.com/netdata/netdata/blob/master/health/health.d/netfilter.conf
  4287. metric: netfilter.conntrack_sockets
  4288. info: netfilter connection tracker table size utilization
  4289. os: "linux"
  4290. metrics:
  4291. folding:
  4292. title: Metrics
  4293. enabled: false
  4294. description: ""
  4295. availability: []
  4296. scopes:
  4297. - name: global
  4298. description: ""
  4299. labels: []
  4300. metrics:
  4301. - name: netfilter.conntrack_sockets
  4302. description: Connection Tracker Connections
  4303. unit: "active connections"
  4304. chart_type: line
  4305. dimensions:
  4306. - name: connections
  4307. - name: netfilter.conntrack_new
  4308. description: Connection Tracker New Connections
  4309. unit: "connections/s"
  4310. chart_type: line
  4311. dimensions:
  4312. - name: new
  4313. - name: ignore
  4314. - name: invalid
  4315. - name: netfilter.conntrack_changes
  4316. description: Connection Tracker Changes
  4317. unit: "changes/s"
  4318. chart_type: line
  4319. dimensions:
  4320. - name: inserted
  4321. - name: deleted
  4322. - name: delete_list
  4323. - name: netfilter.conntrack_expect
  4324. description: Connection Tracker Expectations
  4325. unit: "expectations/s"
  4326. chart_type: line
  4327. dimensions:
  4328. - name: created
  4329. - name: deleted
  4330. - name: new
  4331. - name: netfilter.conntrack_search
  4332. description: Connection Tracker Searches
  4333. unit: "searches/s"
  4334. chart_type: line
  4335. dimensions:
  4336. - name: searched
  4337. - name: restarted
  4338. - name: found
  4339. - name: netfilter.conntrack_errors
  4340. description: Connection Tracker Errors
  4341. unit: "events/s"
  4342. chart_type: line
  4343. dimensions:
  4344. - name: icmp_error
  4345. - name: error_failed
  4346. - name: drop
  4347. - name: early_drop
  4348. - meta:
  4349. plugin_name: proc.plugin
  4350. module_name: /proc/net/stat/synproxy
  4351. monitored_instance:
  4352. name: Synproxy
  4353. link: ""
  4354. categories:
  4355. - data-collection.linux-systems.firewall-metrics
  4356. icon_filename: "firewall.svg"
  4357. related_resources:
  4358. integrations:
  4359. list: []
  4360. info_provided_to_referring_integrations:
  4361. description: ""
  4362. keywords:
  4363. - synproxy
  4364. most_popular: false
  4365. overview:
  4366. data_collection:
  4367. metrics_description: "This integration provides statistics about the Synproxy netfilter module."
  4368. method_description: ""
  4369. supported_platforms:
  4370. include: []
  4371. exclude: []
  4372. multi_instance: true
  4373. additional_permissions:
  4374. description: ""
  4375. default_behavior:
  4376. auto_detection:
  4377. description: ""
  4378. limits:
  4379. description: ""
  4380. performance_impact:
  4381. description: ""
  4382. setup:
  4383. prerequisites:
  4384. list: []
  4385. configuration:
  4386. file:
  4387. name: ""
  4388. description: ""
  4389. options:
  4390. description: ""
  4391. folding:
  4392. title: ""
  4393. enabled: true
  4394. list: []
  4395. examples:
  4396. folding:
  4397. enabled: true
  4398. title: ""
  4399. list: []
  4400. troubleshooting:
  4401. problems:
  4402. list: []
  4403. alerts: []
  4404. metrics:
  4405. folding:
  4406. title: Metrics
  4407. enabled: false
  4408. description: ""
  4409. availability: []
  4410. scopes:
  4411. - name: global
  4412. description: ""
  4413. labels: []
  4414. metrics:
  4415. - name: netfilter.synproxy_syn_received
  4416. description: SYNPROXY SYN Packets received
  4417. unit: "packets/s"
  4418. chart_type: line
  4419. dimensions:
  4420. - name: received
  4421. - name: netfilter.synproxy_conn_reopened
  4422. description: SYNPROXY Connections Reopened
  4423. unit: "connections/s"
  4424. chart_type: line
  4425. dimensions:
  4426. - name: reopened
  4427. - name: netfilter.synproxy_cookies
  4428. description: SYNPROXY TCP Cookies
  4429. unit: "cookies/s"
  4430. chart_type: line
  4431. dimensions:
  4432. - name: valid
  4433. - name: invalid
  4434. - name: retransmits
  4435. - meta:
  4436. plugin_name: proc.plugin
  4437. module_name: /proc/spl/kstat/zfs
  4438. monitored_instance:
  4439. name: ZFS Pools
  4440. link: ""
  4441. categories:
  4442. - data-collection.linux-systems.filesystem-metrics.zfs
  4443. icon_filename: "filesystem.svg"
  4444. related_resources:
  4445. integrations:
  4446. list: []
  4447. info_provided_to_referring_integrations:
  4448. description: ""
  4449. keywords:
  4450. - zfs pools
  4451. - pools
  4452. - zfs
  4453. - filesystem
  4454. most_popular: false
  4455. overview:
  4456. data_collection:
  4457. metrics_description: "This integration provides metrics about the state of ZFS pools."
  4458. method_description: ""
  4459. supported_platforms:
  4460. include: []
  4461. exclude: []
  4462. multi_instance: true
  4463. additional_permissions:
  4464. description: ""
  4465. default_behavior:
  4466. auto_detection:
  4467. description: ""
  4468. limits:
  4469. description: ""
  4470. performance_impact:
  4471. description: ""
  4472. setup:
  4473. prerequisites:
  4474. list: []
  4475. configuration:
  4476. file:
  4477. name: ""
  4478. description: ""
  4479. options:
  4480. description: ""
  4481. folding:
  4482. title: ""
  4483. enabled: true
  4484. list: []
  4485. examples:
  4486. folding:
  4487. enabled: true
  4488. title: ""
  4489. list: []
  4490. troubleshooting:
  4491. problems:
  4492. list: []
  4493. alerts:
  4494. - name: zfs_pool_state_warn
  4495. link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
  4496. metric: zfspool.state
  4497. info: ZFS pool ${label:pool} state is degraded
  4498. - name: zfs_pool_state_crit
  4499. link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
  4500. metric: zfspool.state
  4501. info: ZFS pool ${label:pool} state is faulted or unavail
  4502. metrics:
  4503. folding:
  4504. title: Metrics
  4505. enabled: false
  4506. description: ""
  4507. availability: []
  4508. scopes:
  4509. - name: zfs pool
  4510. description: ""
  4511. labels:
  4512. - name: pool
  4513. description: TBD
  4514. metrics:
  4515. - name: zfspool.state
  4516. description: ZFS pool state
  4517. unit: "boolean"
  4518. chart_type: line
  4519. dimensions:
  4520. - name: online
  4521. - name: degraded
  4522. - name: faulted
  4523. - name: offline
  4524. - name: removed
  4525. - name: unavail
  4526. - name: suspended
  4527. - meta:
  4528. plugin_name: proc.plugin
  4529. module_name: /proc/spl/kstat/zfs/arcstats
  4530. monitored_instance:
  4531. name: ZFS Adaptive Replacement Cache
  4532. link: ""
  4533. categories:
  4534. - data-collection.linux-systems.filesystem-metrics.zfs
  4535. icon_filename: "filesystem.svg"
  4536. related_resources:
  4537. integrations:
  4538. list: []
  4539. info_provided_to_referring_integrations:
  4540. description: ""
  4541. keywords:
  4542. - zfs arc
  4543. - arc
  4544. - zfs
  4545. - filesystem
  4546. most_popular: false
  4547. overview:
  4548. data_collection:
  4549. metrics_description: "This integration monitors ZFS Adadptive Replacement Cache (ARC) statistics."
  4550. method_description: ""
  4551. supported_platforms:
  4552. include: []
  4553. exclude: []
  4554. multi_instance: true
  4555. additional_permissions:
  4556. description: ""
  4557. default_behavior:
  4558. auto_detection:
  4559. description: ""
  4560. limits:
  4561. description: ""
  4562. performance_impact:
  4563. description: ""
  4564. setup:
  4565. prerequisites:
  4566. list: []
  4567. configuration:
  4568. file:
  4569. name: ""
  4570. description: ""
  4571. options:
  4572. description: ""
  4573. folding:
  4574. title: ""
  4575. enabled: true
  4576. list: []
  4577. examples:
  4578. folding:
  4579. enabled: true
  4580. title: ""
  4581. list: []
  4582. troubleshooting:
  4583. problems:
  4584. list: []
  4585. alerts:
  4586. - name: zfs_memory_throttle
  4587. link: https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf
  4588. metric: zfs.memory_ops
  4589. info: number of times ZFS had to limit the ARC growth in the last 10 minutes
  4590. metrics:
  4591. folding:
  4592. title: Metrics
  4593. enabled: false
  4594. description: ""
  4595. availability: []
  4596. scopes:
  4597. - name: global
  4598. description: ""
  4599. labels: []
  4600. metrics:
  4601. - name: zfs.arc_size
  4602. description: ZFS ARC Size
  4603. unit: "MiB"
  4604. chart_type: area
  4605. dimensions:
  4606. - name: arcsz
  4607. - name: target
  4608. - name: min
  4609. - name: max
  4610. - name: zfs.l2_size
  4611. description: ZFS L2 ARC Size
  4612. unit: "MiB"
  4613. chart_type: area
  4614. dimensions:
  4615. - name: actual
  4616. - name: size
  4617. - name: zfs.reads
  4618. description: ZFS Reads
  4619. unit: "reads/s"
  4620. chart_type: area
  4621. dimensions:
  4622. - name: arc
  4623. - name: demand
  4624. - name: prefetch
  4625. - name: metadata
  4626. - name: l2
  4627. - name: zfs.bytes
  4628. description: ZFS ARC L2 Read/Write Rate
  4629. unit: "KiB/s"
  4630. chart_type: area
  4631. dimensions:
  4632. - name: read
  4633. - name: write
  4634. - name: zfs.hits
  4635. description: ZFS ARC Hits
  4636. unit: "percentage"
  4637. chart_type: stacked
  4638. dimensions:
  4639. - name: hits
  4640. - name: misses
  4641. - name: zfs.hits_rate
  4642. description: ZFS ARC Hits Rate
  4643. unit: "events/s"
  4644. chart_type: stacked
  4645. dimensions:
  4646. - name: hits
  4647. - name: misses
  4648. - name: zfs.dhits
  4649. description: ZFS Demand Hits
  4650. unit: "percentage"
  4651. chart_type: stacked
  4652. dimensions:
  4653. - name: hits
  4654. - name: misses
  4655. - name: zfs.dhits_rate
  4656. description: ZFS Demand Hits Rate
  4657. unit: "events/s"
  4658. chart_type: stacked
  4659. dimensions:
  4660. - name: hits
  4661. - name: misses
  4662. - name: zfs.phits
  4663. description: ZFS Prefetch Hits
  4664. unit: "percentage"
  4665. chart_type: stacked
  4666. dimensions:
  4667. - name: hits
  4668. - name: misses
  4669. - name: zfs.phits_rate
  4670. description: ZFS Prefetch Hits Rate
  4671. unit: "events/s"
  4672. chart_type: stacked
  4673. dimensions:
  4674. - name: hits
  4675. - name: misses
  4676. - name: zfs.mhits
  4677. description: ZFS Metadata Hits
  4678. unit: "percentage"
  4679. chart_type: stacked
  4680. dimensions:
  4681. - name: hits
  4682. - name: misses
  4683. - name: zfs.mhits_rate
  4684. description: ZFS Metadata Hits Rate
  4685. unit: "events/s"
  4686. chart_type: stacked
  4687. dimensions:
  4688. - name: hits
  4689. - name: misses
  4690. - name: zfs.l2hits
  4691. description: ZFS L2 Hits
  4692. unit: "percentage"
  4693. chart_type: stacked
  4694. dimensions:
  4695. - name: hits
  4696. - name: misses
  4697. - name: zfs.l2hits_rate
  4698. description: ZFS L2 Hits Rate
  4699. unit: "events/s"
  4700. chart_type: stacked
  4701. dimensions:
  4702. - name: hits
  4703. - name: misses
  4704. - name: zfs.list_hits
  4705. description: ZFS List Hits
  4706. unit: "hits/s"
  4707. chart_type: area
  4708. dimensions:
  4709. - name: mfu
  4710. - name: mfu_ghost
  4711. - name: mru
  4712. - name: mru_ghost
  4713. - name: zfs.arc_size_breakdown
  4714. description: ZFS ARC Size Breakdown
  4715. unit: "percentage"
  4716. chart_type: stacked
  4717. dimensions:
  4718. - name: recent
  4719. - name: frequent
  4720. - name: zfs.memory_ops
  4721. description: ZFS Memory Operations
  4722. unit: "operations/s"
  4723. chart_type: line
  4724. dimensions:
  4725. - name: direct
  4726. - name: throttled
  4727. - name: indirect
  4728. - name: zfs.important_ops
  4729. description: ZFS Important Operations
  4730. unit: "operations/s"
  4731. chart_type: line
  4732. dimensions:
  4733. - name: evict_skip
  4734. - name: deleted
  4735. - name: mutex_miss
  4736. - name: hash_collisions
  4737. - name: zfs.actual_hits
  4738. description: ZFS Actual Cache Hits
  4739. unit: "percentage"
  4740. chart_type: stacked
  4741. dimensions:
  4742. - name: hits
  4743. - name: misses
  4744. - name: zfs.actual_hits_rate
  4745. description: ZFS Actual Cache Hits Rate
  4746. unit: "events/s"
  4747. chart_type: stacked
  4748. dimensions:
  4749. - name: hits
  4750. - name: misses
  4751. - name: zfs.demand_data_hits
  4752. description: ZFS Data Demand Efficiency
  4753. unit: "percentage"
  4754. chart_type: stacked
  4755. dimensions:
  4756. - name: hits
  4757. - name: misses
  4758. - name: zfs.demand_data_hits_rate
  4759. description: ZFS Data Demand Efficiency Rate
  4760. unit: "events/s"
  4761. chart_type: stacked
  4762. dimensions:
  4763. - name: hits
  4764. - name: misses
  4765. - name: zfs.prefetch_data_hits
  4766. description: ZFS Data Prefetch Efficiency
  4767. unit: "percentage"
  4768. chart_type: stacked
  4769. dimensions:
  4770. - name: hits
  4771. - name: misses
  4772. - name: zfs.prefetch_data_hits_rate
  4773. description: ZFS Data Prefetch Efficiency Rate
  4774. unit: "events/s"
  4775. chart_type: stacked
  4776. dimensions:
  4777. - name: hits
  4778. - name: misses
  4779. - name: zfs.hash_elements
  4780. description: ZFS ARC Hash Elements
  4781. unit: "elements"
  4782. chart_type: line
  4783. dimensions:
  4784. - name: current
  4785. - name: max
  4786. - name: zfs.hash_chains
  4787. description: ZFS ARC Hash Chains
  4788. unit: "chains"
  4789. chart_type: line
  4790. dimensions:
  4791. - name: current
  4792. - name: max
  4793. - meta:
  4794. plugin_name: proc.plugin
  4795. module_name: /sys/fs/btrfs
  4796. monitored_instance:
  4797. name: BTRFS
  4798. link: ""
  4799. categories:
  4800. - data-collection.linux-systems.filesystem-metrics.btrfs
  4801. icon_filename: "filesystem.svg"
  4802. related_resources:
  4803. integrations:
  4804. list: []
  4805. info_provided_to_referring_integrations:
  4806. description: ""
  4807. keywords:
  4808. - btrfs
  4809. - filesystem
  4810. most_popular: false
  4811. overview:
  4812. data_collection:
  4813. metrics_description: "This integration provides usage and error statistics from the BTRFS filesystem."
  4814. method_description: ""
  4815. supported_platforms:
  4816. include: []
  4817. exclude: []
  4818. multi_instance: true
  4819. additional_permissions:
  4820. description: ""
  4821. default_behavior:
  4822. auto_detection:
  4823. description: ""
  4824. limits:
  4825. description: ""
  4826. performance_impact:
  4827. description: ""
  4828. setup:
  4829. prerequisites:
  4830. list: []
  4831. configuration:
  4832. file:
  4833. name: ""
  4834. description: ""
  4835. options:
  4836. description: ""
  4837. folding:
  4838. title: ""
  4839. enabled: true
  4840. list: []
  4841. examples:
  4842. folding:
  4843. enabled: true
  4844. title: ""
  4845. list: []
  4846. troubleshooting:
  4847. problems:
  4848. list: []
  4849. alerts:
  4850. - name: btrfs_allocated
  4851. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4852. metric: btrfs.disk
  4853. info: percentage of allocated BTRFS physical disk space
  4854. os: "*"
  4855. - name: btrfs_data
  4856. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4857. metric: btrfs.data
  4858. info: utilization of BTRFS data space
  4859. os: "*"
  4860. - name: btrfs_metadata
  4861. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4862. metric: btrfs.metadata
  4863. info: utilization of BTRFS metadata space
  4864. os: "*"
  4865. - name: btrfs_system
  4866. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4867. metric: btrfs.system
  4868. info: utilization of BTRFS system space
  4869. os: "*"
  4870. - name: btrfs_device_read_errors
  4871. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4872. metric: btrfs.device_errors
  4873. info: number of encountered BTRFS read errors
  4874. os: "*"
  4875. - name: btrfs_device_write_errors
  4876. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4877. metric: btrfs.device_errors
  4878. info: number of encountered BTRFS write errors
  4879. os: "*"
  4880. - name: btrfs_device_flush_errors
  4881. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4882. metric: btrfs.device_errors
  4883. info: number of encountered BTRFS flush errors
  4884. os: "*"
  4885. - name: btrfs_device_corruption_errors
  4886. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4887. metric: btrfs.device_errors
  4888. info: number of encountered BTRFS corruption errors
  4889. os: "*"
  4890. - name: btrfs_device_generation_errors
  4891. link: https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf
  4892. metric: btrfs.device_errors
  4893. info: number of encountered BTRFS generation errors
  4894. os: "*"
  4895. metrics:
  4896. folding:
  4897. title: Metrics
  4898. enabled: false
  4899. description: ""
  4900. availability: []
  4901. scopes:
  4902. - name: btrfs filesystem
  4903. description: ""
  4904. labels:
  4905. - name: filesystem_uuid
  4906. description: TBD
  4907. - name: filesystem_label
  4908. description: TBD
  4909. metrics:
  4910. - name: btrfs.disk
  4911. description: BTRFS Physical Disk Allocation
  4912. unit: "MiB"
  4913. chart_type: stacked
  4914. dimensions:
  4915. - name: unallocated
  4916. - name: data_free
  4917. - name: data_used
  4918. - name: meta_free
  4919. - name: meta_used
  4920. - name: sys_free
  4921. - name: sys_used
  4922. - name: btrfs.data
  4923. description: BTRFS Data Allocation
  4924. unit: "MiB"
  4925. chart_type: stacked
  4926. dimensions:
  4927. - name: free
  4928. - name: used
  4929. - name: btrfs.metadata
  4930. description: BTRFS Metadata Allocation
  4931. unit: "MiB"
  4932. chart_type: stacked
  4933. dimensions:
  4934. - name: free
  4935. - name: used
  4936. - name: reserved
  4937. - name: btrfs.system
  4938. description: BTRFS System Allocation
  4939. unit: "MiB"
  4940. chart_type: stacked
  4941. dimensions:
  4942. - name: free
  4943. - name: used
  4944. - name: btrfs.commits
  4945. description: BTRFS Commits
  4946. unit: "commits"
  4947. chart_type: line
  4948. dimensions:
  4949. - name: commits
  4950. - name: btrfs.commits_perc_time
  4951. description: BTRFS Commits Time Share
  4952. unit: "percentage"
  4953. chart_type: line
  4954. dimensions:
  4955. - name: commits
  4956. - name: btrfs.commit_timings
  4957. description: BTRFS Commit Timings
  4958. unit: "ms"
  4959. chart_type: line
  4960. dimensions:
  4961. - name: last
  4962. - name: max
  4963. - name: btrfs device
  4964. description: ""
  4965. labels:
  4966. - name: device_id
  4967. description: TBD
  4968. - name: filesystem_uuid
  4969. description: TBD
  4970. - name: filesystem_label
  4971. description: TBD
  4972. metrics:
  4973. - name: btrfs.device_errors
  4974. description: BTRFS Device Errors
  4975. unit: "errors"
  4976. chart_type: line
  4977. dimensions:
  4978. - name: write_errs
  4979. - name: read_errs
  4980. - name: flush_errs
  4981. - name: corruption_errs
  4982. - name: generation_errs
  4983. - meta:
  4984. plugin_name: proc.plugin
  4985. module_name: /sys/class/power_supply
  4986. monitored_instance:
  4987. name: Power Supply
  4988. link: ""
  4989. categories:
  4990. - data-collection.linux-systems.power-supply-metrics
  4991. icon_filename: "powersupply.svg"
  4992. related_resources:
  4993. integrations:
  4994. list: []
  4995. info_provided_to_referring_integrations:
  4996. description: ""
  4997. keywords:
  4998. - psu
  4999. - power supply
  5000. most_popular: false
  5001. overview:
  5002. data_collection:
  5003. metrics_description: "This integration monitors Power supply metrics, such as battery status, AC power status and more."
  5004. method_description: ""
  5005. supported_platforms:
  5006. include: []
  5007. exclude: []
  5008. multi_instance: true
  5009. additional_permissions:
  5010. description: ""
  5011. default_behavior:
  5012. auto_detection:
  5013. description: ""
  5014. limits:
  5015. description: ""
  5016. performance_impact:
  5017. description: ""
  5018. setup:
  5019. prerequisites:
  5020. list: []
  5021. configuration:
  5022. file:
  5023. name: ""
  5024. description: ""
  5025. options:
  5026. description: ""
  5027. folding:
  5028. title: ""
  5029. enabled: true
  5030. list: []
  5031. examples:
  5032. folding:
  5033. enabled: true
  5034. title: ""
  5035. list: []
  5036. troubleshooting:
  5037. problems:
  5038. list: []
  5039. alerts:
  5040. - name: linux_power_supply_capacity
  5041. link: https://github.com/netdata/netdata/blob/master/health/health.d/linux_power_supply.conf
  5042. metric: powersupply.capacity
  5043. info: percentage of remaining power supply capacity
  5044. metrics:
  5045. folding:
  5046. title: Metrics
  5047. enabled: false
  5048. description: ""
  5049. availability: []
  5050. scopes:
  5051. - name: power device
  5052. description: ""
  5053. labels:
  5054. - name: device
  5055. description: TBD
  5056. metrics:
  5057. - name: powersupply.capacity
  5058. description: Battery capacity
  5059. unit: "percentage"
  5060. chart_type: line
  5061. dimensions:
  5062. - name: capacity
  5063. - name: powersupply.charge
  5064. description: Battery charge
  5065. unit: "Ah"
  5066. chart_type: line
  5067. dimensions:
  5068. - name: empty_design
  5069. - name: empty
  5070. - name: now
  5071. - name: full
  5072. - name: full_design
  5073. - name: powersupply.energy
  5074. description: Battery energy
  5075. unit: "Wh"
  5076. chart_type: line
  5077. dimensions:
  5078. - name: empty_design
  5079. - name: empty
  5080. - name: now
  5081. - name: full
  5082. - name: full_design
  5083. - name: powersupply.voltage
  5084. description: Power supply voltage
  5085. unit: "V"
  5086. chart_type: line
  5087. dimensions:
  5088. - name: min_design
  5089. - name: min
  5090. - name: now
  5091. - name: max
  5092. - name: max_design
  5093. - meta:
  5094. plugin_name: proc.plugin
  5095. module_name: /sys/class/drm
  5096. monitored_instance:
  5097. name: AMD GPU
  5098. link: "https://www.amd.com"
  5099. categories:
  5100. - data-collection.hardware-devices-and-sensors
  5101. icon_filename: amd.svg
  5102. related_resources:
  5103. integrations:
  5104. list: []
  5105. info_provided_to_referring_integrations:
  5106. description: ""
  5107. keywords:
  5108. - amd
  5109. - gpu
  5110. - hardware
  5111. most_popular: false
  5112. overview:
  5113. data_collection:
  5114. metrics_description: "This integration monitors AMD GPU metrics, such as utilization, clock frequency and memory usage."
  5115. method_description: "It reads `/sys/class/drm` to collect metrics for every AMD GPU card instance it encounters."
  5116. supported_platforms:
  5117. include:
  5118. - Linux
  5119. exclude: []
  5120. multi_instance: true
  5121. additional_permissions:
  5122. description: ""
  5123. default_behavior:
  5124. auto_detection:
  5125. description: ""
  5126. limits:
  5127. description: ""
  5128. performance_impact:
  5129. description: ""
  5130. setup:
  5131. prerequisites:
  5132. list: []
  5133. configuration:
  5134. file:
  5135. name: ""
  5136. description: ""
  5137. options:
  5138. description: ""
  5139. folding:
  5140. title: ""
  5141. enabled: true
  5142. list: []
  5143. examples:
  5144. folding:
  5145. enabled: true
  5146. title: ""
  5147. list: []
  5148. troubleshooting:
  5149. problems:
  5150. list: []
  5151. alerts: []
  5152. metrics:
  5153. folding:
  5154. title: Metrics
  5155. enabled: false
  5156. description: ""
  5157. availability: []
  5158. scopes:
  5159. - name: gpu
  5160. description: "These metrics refer to the GPU."
  5161. labels:
  5162. - name: product_name
  5163. description: GPU product name (e.g. AMD RX 6600)
  5164. metrics:
  5165. - name: amdgpu.gpu_utilization
  5166. description: GPU utilization
  5167. unit: "percentage"
  5168. chart_type: line
  5169. dimensions:
  5170. - name: utilization
  5171. - name: amdgpu.gpu_mem_utilization
  5172. description: GPU memory utilization
  5173. unit: "percentage"
  5174. chart_type: line
  5175. dimensions:
  5176. - name: utilization
  5177. - name: amdgpu.gpu_clk_frequency
  5178. description: GPU clock frequency
  5179. unit: "MHz"
  5180. chart_type: line
  5181. dimensions:
  5182. - name: frequency
  5183. - name: amdgpu.gpu_mem_clk_frequency
  5184. description: GPU memory clock frequency
  5185. unit: "MHz"
  5186. chart_type: line
  5187. dimensions:
  5188. - name: frequency
  5189. - name: amdgpu.gpu_mem_vram_usage_perc
  5190. description: VRAM memory usage percentage
  5191. unit: "percentage"
  5192. chart_type: line
  5193. dimensions:
  5194. - name: usage
  5195. - name: amdgpu.gpu_mem_vram_usage
  5196. description: VRAM memory usage
  5197. unit: "bytes"
  5198. chart_type: area
  5199. dimensions:
  5200. - name: free
  5201. - name: used
  5202. - name: amdgpu.gpu_mem_vis_vram_usage_perc
  5203. description: visible VRAM memory usage percentage
  5204. unit: "percentage"
  5205. chart_type: line
  5206. dimensions:
  5207. - name: usage
  5208. - name: amdgpu.gpu_mem_vis_vram_usage
  5209. description: visible VRAM memory usage
  5210. unit: "bytes"
  5211. chart_type: area
  5212. dimensions:
  5213. - name: free
  5214. - name: used
  5215. - name: amdgpu.gpu_mem_gtt_usage_perc
  5216. description: GTT memory usage percentage
  5217. unit: "percentage"
  5218. chart_type: line
  5219. dimensions:
  5220. - name: usage
  5221. - name: amdgpu.gpu_mem_gtt_usage
  5222. description: GTT memory usage
  5223. unit: "bytes"
  5224. chart_type: area
  5225. dimensions:
  5226. - name: free
  5227. - name: used