proc_pressure.c 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "plugin_proc.h"
  3. #define PLUGIN_PROC_MODULE_PRESSURE_NAME "/proc/pressure"
  4. #define CONFIG_SECTION_PLUGIN_PROC_PRESSURE "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_PRESSURE_NAME
  5. // linux calculates this every 2 seconds, see kernel/sched/psi.c PSI_FREQ
  6. #define MIN_PRESSURE_UPDATE_EVERY 2
  7. static int pressure_update_every = 0;
  8. static struct pressure resources[PRESSURE_NUM_RESOURCES] = {
  9. {
  10. .some =
  11. {.share_time = {.id = "cpu_some_pressure", .title = "CPU some pressure"},
  12. .total_time = {.id = "cpu_some_pressure_stall_time", .title = "CPU some pressure stall time"}},
  13. .full =
  14. {.share_time = {.id = "cpu_full_pressure", .title = "CPU full pressure"},
  15. .total_time = {.id = "cpu_full_pressure_stall_time", .title = "CPU full pressure stall time"}},
  16. },
  17. {
  18. .some =
  19. {.share_time = {.id = "memory_some_pressure", .title = "Memory some pressure"},
  20. .total_time = {.id = "memory_some_pressure_stall_time", .title = "Memory some pressure stall time"}},
  21. .full =
  22. {.share_time = {.id = "memory_full_pressure", .title = "Memory full pressure"},
  23. .total_time = {.id = "memory_full_pressure_stall_time", .title = "Memory full pressure stall time"}},
  24. },
  25. {
  26. .some =
  27. {.share_time = {.id = "io_some_pressure", .title = "I/O some pressure"},
  28. .total_time = {.id = "io_some_pressure_stall_time", .title = "I/O some pressure stall time"}},
  29. .full =
  30. {.share_time = {.id = "io_full_pressure", .title = "I/O full pressure"},
  31. .total_time = {.id = "io_full_pressure_stall_time", .title = "I/O full pressure stall time"}},
  32. },
  33. };
  34. static struct resource_info {
  35. procfile *pf;
  36. const char *name; // metric file name
  37. const char *family; // webui section name
  38. int section_priority;
  39. } resource_info[PRESSURE_NUM_RESOURCES] = {
  40. { .name = "cpu", .family = "cpu", .section_priority = NETDATA_CHART_PRIO_SYSTEM_CPU },
  41. { .name = "memory", .family = "ram", .section_priority = NETDATA_CHART_PRIO_SYSTEM_RAM },
  42. { .name = "io", .family = "disk", .section_priority = NETDATA_CHART_PRIO_SYSTEM_IO },
  43. };
  44. void update_pressure_charts(struct pressure_charts *pcs) {
  45. if (pcs->share_time.st) {
  46. rrddim_set_by_pointer(
  47. pcs->share_time.st, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
  48. rrddim_set_by_pointer(
  49. pcs->share_time.st, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
  50. rrddim_set_by_pointer(
  51. pcs->share_time.st, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
  52. rrdset_done(pcs->share_time.st);
  53. }
  54. if (pcs->total_time.st) {
  55. rrddim_set_by_pointer(
  56. pcs->total_time.st, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
  57. rrdset_done(pcs->total_time.st);
  58. }
  59. }
  60. static void proc_pressure_do_resource(procfile *ff, int res_idx, int some) {
  61. struct pressure_charts *pcs;
  62. struct resource_info ri;
  63. pcs = some ? &resources[res_idx].some : &resources[res_idx].full;
  64. ri = resource_info[res_idx];
  65. if (unlikely(!pcs->share_time.st)) {
  66. pcs->share_time.st = rrdset_create_localhost(
  67. "system",
  68. pcs->share_time.id,
  69. NULL,
  70. ri.family,
  71. NULL,
  72. pcs->share_time.title,
  73. "percentage",
  74. PLUGIN_PROC_NAME,
  75. PLUGIN_PROC_MODULE_PRESSURE_NAME,
  76. ri.section_priority + (some ? 40 : 50),
  77. pressure_update_every,
  78. RRDSET_TYPE_LINE);
  79. pcs->share_time.rd10 =
  80. rrddim_add(pcs->share_time.st, some ? "some 10" : "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  81. pcs->share_time.rd60 =
  82. rrddim_add(pcs->share_time.st, some ? "some 60" : "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  83. pcs->share_time.rd300 =
  84. rrddim_add(pcs->share_time.st, some ? "some 300" : "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
  85. } else {
  86. rrdset_next(pcs->share_time.st);
  87. }
  88. pcs->share_time.value10 = strtod(procfile_lineword(ff, some ? 0 : 1, 2), NULL);
  89. pcs->share_time.value60 = strtod(procfile_lineword(ff, some ? 0 : 1, 4), NULL);
  90. pcs->share_time.value300 = strtod(procfile_lineword(ff, some ? 0 : 1, 6), NULL);
  91. if (unlikely(!pcs->total_time.st)) {
  92. pcs->total_time.st = rrdset_create_localhost(
  93. "system",
  94. pcs->total_time.id,
  95. NULL,
  96. ri.family,
  97. NULL,
  98. pcs->total_time.title,
  99. "ms",
  100. PLUGIN_PROC_NAME,
  101. PLUGIN_PROC_MODULE_PRESSURE_NAME,
  102. ri.section_priority + (some ? 45 : 55),
  103. pressure_update_every,
  104. RRDSET_TYPE_LINE);
  105. pcs->total_time.rdtotal = rrddim_add(pcs->total_time.st, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  106. } else {
  107. rrdset_next(pcs->total_time.st);
  108. }
  109. pcs->total_time.value_total = str2ull(procfile_lineword(ff, some ? 0 : 1, 8)) / 1000;
  110. }
  111. static void proc_pressure_do_resource_some(procfile *ff, int res_idx) {
  112. proc_pressure_do_resource(ff, res_idx, 1);
  113. }
  114. static void proc_pressure_do_resource_full(procfile *ff, int res_idx) {
  115. proc_pressure_do_resource(ff, res_idx, 0);
  116. }
  117. int do_proc_pressure(int update_every, usec_t dt) {
  118. int fail_count = 0;
  119. int i;
  120. static usec_t next_pressure_dt = 0;
  121. static char *base_path = NULL;
  122. update_every = (update_every < MIN_PRESSURE_UPDATE_EVERY) ? MIN_PRESSURE_UPDATE_EVERY : update_every;
  123. pressure_update_every = update_every;
  124. if (next_pressure_dt <= dt) {
  125. next_pressure_dt = update_every * USEC_PER_SEC;
  126. } else {
  127. next_pressure_dt -= dt;
  128. return 0;
  129. }
  130. if (unlikely(!base_path)) {
  131. base_path = config_get(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, "base path of pressure metrics", "/proc/pressure");
  132. }
  133. for (i = 0; i < PRESSURE_NUM_RESOURCES; i++) {
  134. procfile *ff = resource_info[i].pf;
  135. int do_some = resources[i].some.enabled, do_full = resources[i].full.enabled;
  136. if (unlikely(!ff)) {
  137. char filename[FILENAME_MAX + 1];
  138. char config_key[CONFIG_MAX_NAME + 1];
  139. snprintfz(filename
  140. , FILENAME_MAX
  141. , "%s%s/%s"
  142. , netdata_configured_host_prefix
  143. , base_path
  144. , resource_info[i].name);
  145. snprintfz(config_key, CONFIG_MAX_NAME, "enable %s some pressure", resource_info[i].name);
  146. do_some = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
  147. resources[i].some.enabled = do_some;
  148. snprintfz(config_key, CONFIG_MAX_NAME, "enable %s full pressure", resource_info[i].name);
  149. do_full = config_get_boolean(CONFIG_SECTION_PLUGIN_PROC_PRESSURE, config_key, CONFIG_BOOLEAN_YES);
  150. resources[i].full.enabled = do_full;
  151. ff = procfile_open(filename, " =", PROCFILE_FLAG_DEFAULT);
  152. if (unlikely(!ff)) {
  153. error("Cannot read pressure information from %s.", filename);
  154. fail_count++;
  155. continue;
  156. }
  157. }
  158. ff = procfile_readall(ff);
  159. resource_info[i].pf = ff;
  160. if (unlikely(!ff)) {
  161. fail_count++;
  162. continue;
  163. }
  164. size_t lines = procfile_lines(ff);
  165. if (unlikely(lines < 1)) {
  166. error("%s has no lines.", procfile_filename(ff));
  167. fail_count++;
  168. continue;
  169. }
  170. if (do_some) {
  171. proc_pressure_do_resource_some(ff, i);
  172. update_pressure_charts(&resources[i].some);
  173. }
  174. if (do_full && lines > 2) {
  175. proc_pressure_do_resource_full(ff, i);
  176. update_pressure_charts(&resources[i].full);
  177. }
  178. }
  179. if (PRESSURE_NUM_RESOURCES == fail_count) {
  180. return 1;
  181. }
  182. return 0;
  183. }