proc_pressure.c 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "plugin_proc.h"
  3. #define PLUGIN_PROC_MODULE_PRESSURE_NAME "/proc/pressure"
  4. #define CONFIG_SECTION_PLUGIN_PROC_PRESSURE "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_PRESSURE_NAME
  5. // linux calculates this every 2 seconds, see kernel/sched/psi.c PSI_FREQ
  6. #define MIN_PRESSURE_UPDATE_EVERY 2
  7. static int pressure_update_every = 0;
  8. static struct pressure resources[PRESSURE_NUM_RESOURCES] = {
  9. {
  10. .some =
  11. {.share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
  12. .total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}},
  13. .full =
  14. {.share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
  15. .total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}},
  16. },
  17. {
  18. .some =
  19. {.share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
  20. .total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}},
  21. .full =
  22. {.share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
  23. .total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}},
  24. },
  25. {
  26. .some =
  27. {.share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
  28. .total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}},
  29. .full =
  30. {.share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
  31. .total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}},
  32. },
  33. };
  34. static struct resource_info {
  35. procfile *pf;
  36. const char *name; // metric file name
  37. const char *family; // webui section name
  38. int section_priority;
  39. } resource_info[PRESSURE_NUM_RESOURCES] = {
  40. { .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU },
  41. { .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM },
  42. { .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO },
  43. };
  44. void update_pressure_charts(struct pressure_charts *pcs) {
  45. if (pcs->share_time.st) {
  46. rrddim_set_by_pointer(
  47. pcs->share_time.st, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
  48. rrddim_set_by_pointer(
  49. pcs->share_time.st, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
  50. rrddim_set_by_pointer(
  51. pcs->share_time.st, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
  52. rrdset_done(pcs->share_time.st);
  53. }
  54. if (pcs->total_time.st) {
  55. rrddim_set_by_pointer(
  56. pcs->total_time.st, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
  57. rrdset_done(pcs->total_time.st);
  58. }
  59. }
  60. static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
  61. struct pressure_charts *pcs;
  62. struct resource_info ri;
  63. pcs = some ? &resources[res_idx].some : &resources[res_idx].full;
  64. ri = resource_info[res_idx];
  65. if (unlikely(!pcs->share_time.st)) {
  66. pcs->share_time.st = rrdset_create_localhost(
  67. "system",
  68. pcs->share_time.id,
  69. NULL,
  70. ri.family,
  71. NULL,
  72. pcs->share_time.title,
  73. "percentage",
  74. PLUGIN_PROC_NAME,
  75. PLUGIN_PROC_MODULE_PRESSURE_NAME,
  76. ri.section_priority + (some ? 40 : 50),
  77. pressure_update_every,
  78. RRDSET_TYPE_LINE);
  79. pcs->share_time.rd10 =
  80. rrddim_add(pcs->share_time.st, some ? "some 10" : "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  81. pcs->share_time.rd60 =
  82. rrddim_add(pcs->share_time.st, some ? "some 60" : "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  83. pcs->share_time.rd300 =
  84. rrddim_add(pcs->share_time.st, some ? "some 300" : "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  85. }
  86. pcs->share_time.value10 = strtod(procfile_lineword(ff, some ? 0 : 1, 2), NULL);
  87. pcs->share_time.value60 = strtod(procfile_lineword(ff, some ? 0 : 1, 4), NULL);
  88. pcs->share_time.value300 = strtod(procfile_lineword(ff, some ? 0 : 1, 6), NULL);
  89. if (unlikely(!pcs->total_time.st)) {
  90. pcs->total_time.st = rrdset_create_localhost(
  91. "system",
  92. pcs->total_time.id,
  93. NULL,
  94. ri.family,
  95. NULL,
  96. pcs->total_time.title,
  97. "ms",
  98. PLUGIN_PROC_NAME,
  99. PLUGIN_PROC_MODULE_PRESSURE_NAME,
  100. ri.section_priority + (some ? 45 : 55),
  101. pressure_update_every,
  102. RRDSET_TYPE_LINE);
  103. pcs->total_time.rdtotal = rrddim_add(pcs->total_time.st, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  104. }
  105. pcs->total_time.value_total = str2ull(procfile_lineword(ff, some ? 0 : 1, 8), NULL) / 1000;
  106. }
  107. static void proc_pressure_do_resource_some(procfile *ff, int res_idx) {
  108. proc_pressure_do_resource(ff, res_idx, 1);
  109. }
  110. static void proc_pressure_do_resource_full(procfile *ff, int res_idx) {
  111. proc_pressure_do_resource(ff, res_idx, 0);
  112. }
  113. int do_proc_pressure(int update_every, usec_t dt) {
  114. int fail_count = 0;
  115. int i;
  116. static usec_t next_pressure_dt = 0;
  117. static char *base_path = NULL;
  118. update_every = (update_every < MIN_PRESSURE_UPDATE_EVERY) ? MIN_PRESSURE_UPDATE_EVERY : update_every;
  119. pressure_update_every = update_every;
  120. if (next_pressure_dt <= dt) {
  121. next_pressure_dt = update_every * USEC_PER_SEC;
  122. } else {
  123. next_pressure_dt -= dt;
  124. return 0;
  125. }
  126. if (unlikely(!base_path)) {
  127. base_path = config_get(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, "base path of pressure metrics", "/proc/pressure");
  128. }
  129. for (i = 0; i < PRESSURE_NUM_RESOURCES; i++) {
  130. procfile *ff = resource_info[i].pf;
  131. int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled;
  132. if (unlikely(!ff)) {
  133. char filename[FILENAME_MAX + 1];
  134. char config_key[CONFIG_MAX_NAME + 1];
  135. snprintfz(filename
  136. , FILENAME_MAX
  137. , "%s%s/%s"
  138. , netdata_configured_host_prefix
  139. , base_path
  140. , resource_info[i].name);
  141. snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name);
  142. do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
  143. resources[i].some.enabled = do_some;
  144. // Disable CPU full pressure.
  145. // See https://github.com/torvalds/linux/commit/890d550d7dbac7a31ecaa78732aa22be282bb6b8
  146. if (i == 0) {
  147. do_full = CONFIG_BOOLEAN_NO;
  148. resources[i].full.enabled = do_full;
  149. } else {
  150. snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
  151. do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
  152. resources[i].full.enabled = do_full;
  153. }
  154. ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT);
  155. if (unlikely(!ff)) {
  156. collector_error("Cannot read pressure information from %s.", filename);
  157. fail_count++;
  158. continue;
  159. }
  160. }
  161. ff = procfile_readall(ff);
  162. resource_info[i].pf = ff;
  163. if (unlikely(!ff)) {
  164. fail_count++;
  165. continue;
  166. }
  167. size_t lines = procfile_lines(ff);
  168. if (unlikely(lines < 1)) {
  169. collector_error("%s has no lines.", procfile_filename(ff));
  170. fail_count++;
  171. continue;
  172. }
  173. if (do_some) {
  174. proc_pressure_do_resource_some(ff, i);
  175. update_pressure_charts(&resources[i].some);
  176. }
  177. if (do_full && lines > 2) {
  178. proc_pressure_do_resource_full(ff, i);
  179. update_pressure_charts(&resources[i].full);
  180. }
  181. }
  182. if (PRESSURE_NUM_RESOURCES == fail_count) {
  183. return 1;
  184. }
  185. return 0;
  186. }