proc_pressure.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "plugin_proc.h"
  3. #define PLUGIN_PROC_MODULE_PRESSURE_NAME "/proc/pressure"
  4. #define CONFIG_SECTION_PLUGIN_PROC_PRESSURE "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_PRESSURE_NAME
  5. // linux calculates this every 2 seconds, see kernel/sched/psi.c PSI_FREQ
  6. #define MIN_PRESSURE_UPDATE_EVERY 2
  7. static int pressure_update_every = 0;
  8. static struct pressure resources[PRESSURE_NUM_RESOURCES] = {
  9. {
  10. .some = {
  11. .available = true,
  12. .share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
  13. .total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}
  14. },
  15. .full = {
  16. // Disable CPU full pressure.
  17. // See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8
  18. .available = false,
  19. .share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
  20. .total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}
  21. },
  22. },
  23. {
  24. .some = {
  25. .available = true,
  26. .share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
  27. .total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}
  28. },
  29. .full = {
  30. .available = true,
  31. .share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
  32. .total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}
  33. },
  34. },
  35. {
  36. .some = {
  37. .available = true,
  38. .share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
  39. .total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}
  40. },
  41. .full = {
  42. .available = true,
  43. .share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
  44. .total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}
  45. },
  46. },
  47. {
  48. .some = {
  49. // this is not available
  50. .available = false,
  51. .share_time = {.id = "irq_some_pressure", .title = "IRQ some pressure"},
  52. .total_time = {.id = "irq_some_pressure_stall_time", .title = "IRQ some pressure stall time"}
  53. },
  54. .full = {
  55. .available = true,
  56. .share_time = {.id = "irq_full_pressure", .title = "IRQ full pressure"},
  57. .total_time = {.id = "irq_full_pressure_stall_time", .title = "IRQ full pressure stall time"}
  58. },
  59. },
  60. };
  61. static struct resource_info {
  62. procfile *pf;
  63. const char *name; // metric file name
  64. const char *family; // webui section name
  65. int section_priority;
  66. } resource_info[PRESSURE_NUM_RESOURCES] = {
  67. { .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU },
  68. { .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM },
  69. { .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO },
  70. { .name = "irq", .family = "interrupts", .section_priority = NETDATA_CHART_PRIO_SYSTEM_INTERRUPTS },
  71. };
  72. void update_pressure_charts(struct pressure_charts *pcs) {
  73. if (pcs->share_time.st) {
  74. rrddim_set_by_pointer(
  75. pcs->share_time.st, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
  76. rrddim_set_by_pointer(
  77. pcs->share_time.st, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
  78. rrddim_set_by_pointer(
  79. pcs->share_time.st, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
  80. rrdset_done(pcs->share_time.st);
  81. }
  82. if (pcs->total_time.st) {
  83. rrddim_set_by_pointer(
  84. pcs->total_time.st, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
  85. rrdset_done(pcs->total_time.st);
  86. }
  87. }
  88. static void proc_pressure_do_resource(procfile *ff, int res_idx, size_t line, bool some) {
  89. struct pressure_charts *pcs;
  90. struct resource_info ri;
  91. pcs = some ? &resources[res_idx].some : &resources[res_idx].full;
  92. ri = resource_info[res_idx];
  93. if (unlikely(!pcs->share_time.st)) {
  94. pcs->share_time.st = rrdset_create_localhost(
  95. "system",
  96. pcs->share_time.id,
  97. NULL,
  98. ri.family,
  99. NULL,
  100. pcs->share_time.title,
  101. "percentage",
  102. PLUGIN_PROC_NAME,
  103. PLUGIN_PROC_MODULE_PRESSURE_NAME,
  104. ri.section_priority + (some ? 40 : 50),
  105. pressure_update_every,
  106. RRDSET_TYPE_LINE);
  107. pcs->share_time.rd10 =
  108. rrddim_add(pcs->share_time.st, some ? "some 10" : "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  109. pcs->share_time.rd60 =
  110. rrddim_add(pcs->share_time.st, some ? "some 60" : "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  111. pcs->share_time.rd300 =
  112. rrddim_add(pcs->share_time.st, some ? "some 300" : "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  113. }
  114. pcs->share_time.value10 = strtod(procfile_lineword(ff, line, 2), NULL);
  115. pcs->share_time.value60 = strtod(procfile_lineword(ff, line, 4), NULL);
  116. pcs->share_time.value300 = strtod(procfile_lineword(ff, line, 6), NULL);
  117. if (unlikely(!pcs->total_time.st)) {
  118. pcs->total_time.st = rrdset_create_localhost(
  119. "system",
  120. pcs->total_time.id,
  121. NULL,
  122. ri.family,
  123. NULL,
  124. pcs->total_time.title,
  125. "ms",
  126. PLUGIN_PROC_NAME,
  127. PLUGIN_PROC_MODULE_PRESSURE_NAME,
  128. ri.section_priority + (some ? 45 : 55),
  129. pressure_update_every,
  130. RRDSET_TYPE_LINE);
  131. pcs->total_time.rdtotal = rrddim_add(pcs->total_time.st, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  132. }
  133. pcs->total_time.value_total = str2ull(procfile_lineword(ff, line, 8), NULL) / 1000;
  134. }
  135. static void proc_pressure_do_resource_some(procfile *ff, int res_idx, size_t line) {
  136. proc_pressure_do_resource(ff, res_idx, line, true);
  137. }
  138. static void proc_pressure_do_resource_full(procfile *ff, int res_idx, size_t line) {
  139. proc_pressure_do_resource(ff, res_idx, line, false);
  140. }
  141. int do_proc_pressure(int update_every, usec_t dt) {
  142. int ok_count = 0;
  143. int i;
  144. static usec_t next_pressure_dt = 0;
  145. static char *base_path = NULL;
  146. update_every = (update_every < MIN_PRESSURE_UPDATE_EVERY) ? MIN_PRESSURE_UPDATE_EVERY : update_every;
  147. pressure_update_every = update_every;
  148. if (next_pressure_dt <= dt) {
  149. next_pressure_dt = update_every * USEC_PER_SEC;
  150. } else {
  151. next_pressure_dt -= dt;
  152. return 0;
  153. }
  154. if (unlikely(!base_path)) {
  155. base_path = config_get(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, "base path of pressure metrics", "/proc/pressure");
  156. }
  157. for (i = 0; i < PRESSURE_NUM_RESOURCES; i++) {
  158. procfile *ff = resource_info[i].pf;
  159. int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled;
  160. if (!resources[i].some.available && !resources[i].full.available)
  161. continue;
  162. if (unlikely(!ff)) {
  163. char filename[FILENAME_MAX + 1];
  164. char config_key[CONFIG_MAX_NAME + 1];
  165. snprintfz(filename
  166. , FILENAME_MAX
  167. , "%s%s/%s"
  168. , netdata_configured_host_prefix
  169. , base_path
  170. , resource_info[i].name);
  171. do_some = resources[i].some.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
  172. do_full = resources[i].full.available ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
  173. snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name);
  174. do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_some);
  175. resources[i].some.enabled = do_some;
  176. snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
  177. do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, do_full);
  178. resources[i].full.enabled = do_full;
  179. if (!do_full && !do_some) {
  180. resources[i].some.available = false;
  181. resources[i].full.available = false;
  182. continue;
  183. }
  184. ff = procfile_open(filename, " =", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
  185. if (unlikely(!ff)) {
  186. // PSI IRQ was added recently (https://github.com/torvalds/linux/commit/52b1364ba0b105122d6de0e719b36db705011ac1)
  187. if (strcmp(resource_info[i].name, "irq") != 0)
  188. collector_error("Cannot read pressure information from %s.", filename);
  189. resources[i].some.available = false;
  190. resources[i].full.available = false;
  191. continue;
  192. }
  193. }
  194. ff = procfile_readall(ff);
  195. resource_info[i].pf = ff;
  196. if (unlikely(!ff))
  197. continue;
  198. size_t lines = procfile_lines(ff);
  199. if (unlikely(lines < 1)) {
  200. collector_error("%s has no lines.", procfile_filename(ff));
  201. continue;
  202. }
  203. for(size_t l = 0; l < lines ;l++) {
  204. const char *key = procfile_lineword(ff, l, 0);
  205. if(strcmp(key, "some") == 0) {
  206. if(do_some) {
  207. proc_pressure_do_resource_some(ff, i, l);
  208. update_pressure_charts(&resources[i].some);
  209. ok_count++;
  210. }
  211. }
  212. else if(strcmp(key, "full") == 0) {
  213. if(do_full) {
  214. proc_pressure_do_resource_full(ff, i, l);
  215. update_pressure_charts(&resources[i].full);
  216. ok_count++;
  217. }
  218. }
  219. }
  220. }
  221. if(!ok_count)
  222. return 1;
  223. return 0;
  224. }