proc_stat.c 44 KB


  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "plugin_proc.h"
  3. #define PLUGIN_PROC_MODULE_STAT_NAME "/proc/stat"
  4. struct per_core_single_number_file {
  5. unsigned char found:1;
  6. const char *filename;
  7. int fd;
  8. collected_number value;
  9. RRDDIM *rd;
  10. };
  11. struct last_ticks {
  12. collected_number frequency;
  13. collected_number ticks;
  14. };
  15. // This is an extension of struct per_core_single_number_file at CPU_FREQ_INDEX.
  16. // Either scaling_cur_freq or time_in_state file is used at one time.
  17. struct per_core_time_in_state_file {
  18. const char *filename;
  19. procfile *ff;
  20. size_t last_ticks_len;
  21. struct last_ticks *last_ticks;
  22. };
  23. #define CORE_THROTTLE_COUNT_INDEX 0
  24. #define PACKAGE_THROTTLE_COUNT_INDEX 1
  25. #define CPU_FREQ_INDEX 2
  26. #define PER_CORE_FILES 3
  27. struct cpu_chart {
  28. const char *id;
  29. RRDSET *st;
  30. RRDDIM *rd_user;
  31. RRDDIM *rd_nice;
  32. RRDDIM *rd_system;
  33. RRDDIM *rd_idle;
  34. RRDDIM *rd_iowait;
  35. RRDDIM *rd_irq;
  36. RRDDIM *rd_softirq;
  37. RRDDIM *rd_steal;
  38. RRDDIM *rd_guest;
  39. RRDDIM *rd_guest_nice;
  40. bool per_core_files_found;
  41. struct per_core_single_number_file files[PER_CORE_FILES];
  42. struct per_core_time_in_state_file time_in_state_files;
  43. };
  44. static int keep_per_core_fds_open = CONFIG_BOOLEAN_YES;
  45. static int keep_cpuidle_fds_open = CONFIG_BOOLEAN_YES;
  46. static int read_per_core_files(struct cpu_chart *all_cpu_charts, size_t len, size_t index) {
  47. char buf[50 + 1];
  48. size_t x, files_read = 0, files_nonzero = 0;
  49. for(x = 0; x < len ; x++) {
  50. struct per_core_single_number_file *f = &all_cpu_charts[x].files[index];
  51. f->found = 0;
  52. if(unlikely(!f->filename))
  53. continue;
  54. if(unlikely(f->fd == -1)) {
  55. f->fd = open(f->filename, O_RDONLY);
  56. if (unlikely(f->fd == -1)) {
  57. collector_error("Cannot open file '%s'", f->filename);
  58. continue;
  59. }
  60. }
  61. ssize_t ret = read(f->fd, buf, 50);
  62. if(unlikely(ret < 0)) {
  63. // cannot read that file
  64. collector_error("Cannot read file '%s'", f->filename);
  65. close(f->fd);
  66. f->fd = -1;
  67. continue;
  68. }
  69. else {
  70. // successful read
  71. // terminate the buffer
  72. buf[ret] = '\0';
  73. if(unlikely(keep_per_core_fds_open != CONFIG_BOOLEAN_YES)) {
  74. close(f->fd);
  75. f->fd = -1;
  76. }
  77. else if(lseek(f->fd, 0, SEEK_SET) == -1) {
  78. collector_error("Cannot seek in file '%s'", f->filename);
  79. close(f->fd);
  80. f->fd = -1;
  81. }
  82. }
  83. files_read++;
  84. f->found = 1;
  85. f->value = str2ll(buf, NULL);
  86. if(likely(f->value != 0))
  87. files_nonzero++;
  88. }
  89. if(files_read == 0)
  90. return -1;
  91. if(files_nonzero == 0)
  92. return 0;
  93. return (int)files_nonzero;
  94. }
  95. static int read_per_core_time_in_state_files(struct cpu_chart *all_cpu_charts, size_t len, size_t index) {
  96. size_t x, files_read = 0, files_nonzero = 0;
  97. for(x = 0; x < len ; x++) {
  98. struct per_core_single_number_file *f = &all_cpu_charts[x].files[index];
  99. struct per_core_time_in_state_file *tsf = &all_cpu_charts[x].time_in_state_files;
  100. f->found = 0;
  101. if(unlikely(!tsf->filename))
  102. continue;
  103. if(unlikely(!tsf->ff)) {
  104. tsf->ff = procfile_open(tsf->filename, " \t:", PROCFILE_FLAG_DEFAULT);
  105. if(unlikely(!tsf->ff))
  106. {
  107. collector_error("Cannot open file '%s'", tsf->filename);
  108. continue;
  109. }
  110. }
  111. tsf->ff = procfile_readall(tsf->ff);
  112. if(unlikely(!tsf->ff)) {
  113. collector_error("Cannot read file '%s'", tsf->filename);
  114. procfile_close(tsf->ff);
  115. tsf->ff = NULL;
  116. continue;
  117. }
  118. else {
  119. // successful read
  120. size_t lines = procfile_lines(tsf->ff), l;
  121. size_t words;
  122. unsigned long long total_ticks_since_last = 0, avg_freq = 0;
  123. // Check if there is at least one frequency in time_in_state
  124. if (procfile_word(tsf->ff, 0)[0] == '\0') {
  125. if(unlikely(keep_per_core_fds_open != CONFIG_BOOLEAN_YES)) {
  126. procfile_close(tsf->ff);
  127. tsf->ff = NULL;
  128. }
  129. // TODO: Is there a better way to avoid spikes than calculating the average over
  130. // the whole period under schedutil governor?
  131. // freez(tsf->last_ticks);
  132. // tsf->last_ticks = NULL;
  133. // tsf->last_ticks_len = 0;
  134. continue;
  135. }
  136. if (unlikely(tsf->last_ticks_len < lines || tsf->last_ticks == NULL)) {
  137. tsf->last_ticks = reallocz(tsf->last_ticks, sizeof(struct last_ticks) * lines);
  138. memset(tsf->last_ticks, 0, sizeof(struct last_ticks) * lines);
  139. tsf->last_ticks_len = lines;
  140. }
  141. f->value = 0;
  142. for(l = 0; l < lines - 1 ;l++) {
  143. unsigned long long frequency = 0, ticks = 0, ticks_since_last = 0;
  144. words = procfile_linewords(tsf->ff, l);
  145. if(unlikely(words < 2)) {
  146. collector_error("Cannot read time_in_state line. Expected 2 params, read %zu.", words);
  147. continue;
  148. }
  149. frequency = str2ull(procfile_lineword(tsf->ff, l, 0), NULL);
  150. ticks = str2ull(procfile_lineword(tsf->ff, l, 1), NULL);
  151. // It is assumed that frequencies are static and sorted
  152. ticks_since_last = ticks - tsf->last_ticks[l].ticks;
  153. tsf->last_ticks[l].frequency = frequency;
  154. tsf->last_ticks[l].ticks = ticks;
  155. total_ticks_since_last += ticks_since_last;
  156. avg_freq += frequency * ticks_since_last;
  157. }
  158. if (likely(total_ticks_since_last)) {
  159. avg_freq /= total_ticks_since_last;
  160. f->value = avg_freq;
  161. }
  162. if(unlikely(keep_per_core_fds_open != CONFIG_BOOLEAN_YES)) {
  163. procfile_close(tsf->ff);
  164. tsf->ff = NULL;
  165. }
  166. }
  167. files_read++;
  168. f->found = 1;
  169. if(likely(f->value != 0))
  170. files_nonzero++;
  171. }
  172. if(unlikely(files_read == 0))
  173. return -1;
  174. if(unlikely(files_nonzero == 0))
  175. return 0;
  176. return (int)files_nonzero;
  177. }
  178. static void chart_per_core_files(struct cpu_chart *all_cpu_charts, size_t len, size_t index, RRDSET *st, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm) {
  179. size_t x;
  180. for(x = 0; x < len ; x++) {
  181. struct per_core_single_number_file *f = &all_cpu_charts[x].files[index];
  182. if(unlikely(!f->found))
  183. continue;
  184. if(unlikely(!f->rd))
  185. f->rd = rrddim_add(st, all_cpu_charts[x].id, NULL, multiplier, divisor, algorithm);
  186. rrddim_set_by_pointer(st, f->rd, f->value);
  187. }
  188. }
  189. struct cpuidle_state {
  190. char *name;
  191. char *time_filename;
  192. int time_fd;
  193. collected_number value;
  194. RRDDIM *rd;
  195. };
  196. struct per_core_cpuidle_chart {
  197. RRDSET *st;
  198. RRDDIM *active_time_rd;
  199. collected_number active_time;
  200. collected_number last_active_time;
  201. struct cpuidle_state *cpuidle_state;
  202. size_t cpuidle_state_len;
  203. int rescan_cpu_states;
  204. };
  205. static void* wake_cpu_thread(void* core) {
  206. pthread_t thread;
  207. cpu_set_t cpu_set;
  208. static size_t cpu_wakeups = 0;
  209. static int errors = 0;
  210. CPU_ZERO(&cpu_set);
  211. CPU_SET(*(int*)core, &cpu_set);
  212. thread = pthread_self();
  213. if(unlikely(pthread_setaffinity_np(thread, sizeof(cpu_set_t), &cpu_set))) {
  214. if(unlikely(errors < 8)) {
  215. collector_error("Cannot set CPU affinity for core %d", *(int*)core);
  216. errors++;
  217. }
  218. else if(unlikely(errors < 9)) {
  219. collector_error("CPU affinity errors are disabled");
  220. errors++;
  221. }
  222. }
  223. // Make the CPU core do something to force it to update its idle counters
  224. cpu_wakeups++;
  225. return 0;
  226. }
  227. static int read_schedstat(char *schedstat_filename, struct per_core_cpuidle_chart **cpuidle_charts_address, size_t *schedstat_cores_found) {
  228. static size_t cpuidle_charts_len = 0;
  229. static procfile *ff = NULL;
  230. struct per_core_cpuidle_chart *cpuidle_charts = *cpuidle_charts_address;
  231. size_t cores_found = 0;
  232. if(unlikely(!ff)) {
  233. ff = procfile_open(schedstat_filename, " \t:", PROCFILE_FLAG_DEFAULT);
  234. if(unlikely(!ff)) return 1;
  235. }
  236. ff = procfile_readall(ff);
  237. if(unlikely(!ff)) return 1;
  238. size_t lines = procfile_lines(ff), l;
  239. size_t words;
  240. for(l = 0; l < lines ;l++) {
  241. char *row_key = procfile_lineword(ff, l, 0);
  242. // faster strncmp(row_key, "cpu", 3) == 0
  243. if(likely(row_key[0] == 'c' && row_key[1] == 'p' && row_key[2] == 'u')) {
  244. words = procfile_linewords(ff, l);
  245. if(unlikely(words < 10)) {
  246. collector_error("Cannot read /proc/schedstat cpu line. Expected 9 params, read %zu.", words);
  247. return 1;
  248. }
  249. cores_found++;
  250. size_t core = str2ul(&row_key[3]);
  251. if(unlikely(core >= cores_found)) {
  252. collector_error("Core %zu found but no more than %zu cores were expected.", core, cores_found);
  253. return 1;
  254. }
  255. if(unlikely(cpuidle_charts_len < cores_found)) {
  256. cpuidle_charts = reallocz(cpuidle_charts, sizeof(struct per_core_cpuidle_chart) * cores_found);
  257. *cpuidle_charts_address = cpuidle_charts;
  258. memset(cpuidle_charts + cpuidle_charts_len, 0, sizeof(struct per_core_cpuidle_chart) * (cores_found - cpuidle_charts_len));
  259. cpuidle_charts_len = cores_found;
  260. }
  261. cpuidle_charts[core].active_time = str2ull(procfile_lineword(ff, l, 7), NULL) / 1000;
  262. }
  263. }
  264. *schedstat_cores_found = cores_found;
  265. return 0;
  266. }
  267. static int read_one_state(char *buf, const char *filename, int *fd) {
  268. ssize_t ret = read(*fd, buf, 50);
  269. if(unlikely(ret <= 0)) {
  270. // cannot read that file
  271. collector_error("Cannot read file '%s'", filename);
  272. close(*fd);
  273. *fd = -1;
  274. return 0;
  275. }
  276. else {
  277. // successful read
  278. // terminate the buffer
  279. buf[ret - 1] = '\0';
  280. if(unlikely(keep_cpuidle_fds_open != CONFIG_BOOLEAN_YES)) {
  281. close(*fd);
  282. *fd = -1;
  283. }
  284. else if(lseek(*fd, 0, SEEK_SET) == -1) {
  285. collector_error("Cannot seek in file '%s'", filename);
  286. close(*fd);
  287. *fd = -1;
  288. }
  289. }
  290. return 1;
  291. }
  292. static int read_cpuidle_states(char *cpuidle_name_filename , char *cpuidle_time_filename, struct per_core_cpuidle_chart *cpuidle_charts, size_t core) {
  293. char filename[FILENAME_MAX + 1];
  294. static char next_state_filename[FILENAME_MAX + 1];
  295. struct stat stbuf;
  296. struct per_core_cpuidle_chart *cc = &cpuidle_charts[core];
  297. size_t state;
  298. if(unlikely(!cc->cpuidle_state_len || cc->rescan_cpu_states)) {
  299. int state_file_found = 1; // check at least one state
  300. if(cc->cpuidle_state_len) {
  301. for(state = 0; state < cc->cpuidle_state_len; state++) {
  302. freez(cc->cpuidle_state[state].name);
  303. freez(cc->cpuidle_state[state].time_filename);
  304. close(cc->cpuidle_state[state].time_fd);
  305. cc->cpuidle_state[state].time_fd = -1;
  306. }
  307. freez(cc->cpuidle_state);
  308. cc->cpuidle_state = NULL;
  309. cc->cpuidle_state_len = 0;
  310. cc->active_time_rd = NULL;
  311. cc->st = NULL;
  312. }
  313. while(likely(state_file_found)) {
  314. snprintfz(filename, FILENAME_MAX, cpuidle_name_filename, core, cc->cpuidle_state_len);
  315. if (stat(filename, &stbuf) == 0)
  316. cc->cpuidle_state_len++;
  317. else
  318. state_file_found = 0;
  319. }
  320. snprintfz(next_state_filename, FILENAME_MAX, cpuidle_name_filename, core, cc->cpuidle_state_len);
  321. if(likely(cc->cpuidle_state_len))
  322. cc->cpuidle_state = callocz(cc->cpuidle_state_len, sizeof(struct cpuidle_state));
  323. for(state = 0; state < cc->cpuidle_state_len; state++) {
  324. char name_buf[50 + 1];
  325. snprintfz(filename, FILENAME_MAX, cpuidle_name_filename, core, state);
  326. int fd = open(filename, O_RDONLY, 0666);
  327. if(unlikely(fd == -1)) {
  328. collector_error("Cannot open file '%s'", filename);
  329. cc->rescan_cpu_states = 1;
  330. return 1;
  331. }
  332. ssize_t r = read(fd, name_buf, 50);
  333. if(unlikely(r < 1)) {
  334. collector_error("Cannot read file '%s'", filename);
  335. close(fd);
  336. cc->rescan_cpu_states = 1;
  337. return 1;
  338. }
  339. name_buf[r - 1] = '\0'; // erase extra character
  340. cc->cpuidle_state[state].name = strdupz(trim(name_buf));
  341. close(fd);
  342. snprintfz(filename, FILENAME_MAX, cpuidle_time_filename, core, state);
  343. cc->cpuidle_state[state].time_filename = strdupz(filename);
  344. cc->cpuidle_state[state].time_fd = -1;
  345. }
  346. cc->rescan_cpu_states = 0;
  347. }
  348. for(state = 0; state < cc->cpuidle_state_len; state++) {
  349. struct cpuidle_state *cs = &cc->cpuidle_state[state];
  350. if(unlikely(cs->time_fd == -1)) {
  351. cs->time_fd = open(cs->time_filename, O_RDONLY);
  352. if (unlikely(cs->time_fd == -1)) {
  353. collector_error("Cannot open file '%s'", cs->time_filename);
  354. cc->rescan_cpu_states = 1;
  355. return 1;
  356. }
  357. }
  358. char time_buf[50 + 1];
  359. if(likely(read_one_state(time_buf, cs->time_filename, &cs->time_fd))) {
  360. cs->value = str2ll(time_buf, NULL);
  361. }
  362. else {
  363. cc->rescan_cpu_states = 1;
  364. return 1;
  365. }
  366. }
  367. // check if the number of states was increased
  368. if(unlikely(stat(next_state_filename, &stbuf) == 0)) {
  369. cc->rescan_cpu_states = 1;
  370. return 1;
  371. }
  372. return 0;
  373. }
  374. int do_proc_stat(int update_every, usec_t dt) {
  375. (void)dt;
  376. static struct cpu_chart *all_cpu_charts = NULL;
  377. static size_t all_cpu_charts_size = 0;
  378. static procfile *ff = NULL;
  379. static int do_cpu = -1, do_cpu_cores = -1, do_interrupts = -1, do_context = -1, do_forks = -1, do_processes = -1,
  380. do_core_throttle_count = -1, do_package_throttle_count = -1, do_cpu_freq = -1, do_cpuidle = -1;
  381. static uint32_t hash_intr, hash_ctxt, hash_processes, hash_procs_running, hash_procs_blocked;
  382. static char *core_throttle_count_filename = NULL, *package_throttle_count_filename = NULL, *scaling_cur_freq_filename = NULL,
  383. *time_in_state_filename = NULL, *schedstat_filename = NULL, *cpuidle_name_filename = NULL, *cpuidle_time_filename = NULL;
  384. static const RRDVAR_ACQUIRED *cpus_var = NULL;
  385. static int accurate_freq_avail = 0, accurate_freq_is_used = 0;
  386. size_t cores_found = (size_t)get_system_cpus();
  387. if(unlikely(do_cpu == -1)) {
  388. do_cpu = config_get_boolean("plugin:proc:/proc/stat", "cpu utilization", CONFIG_BOOLEAN_YES);
  389. do_cpu_cores = config_get_boolean("plugin:proc:/proc/stat", "per cpu core utilization", CONFIG_BOOLEAN_NO);
  390. do_interrupts = config_get_boolean("plugin:proc:/proc/stat", "cpu interrupts", CONFIG_BOOLEAN_YES);
  391. do_context = config_get_boolean("plugin:proc:/proc/stat", "context switches", CONFIG_BOOLEAN_YES);
  392. do_forks = config_get_boolean("plugin:proc:/proc/stat", "processes started", CONFIG_BOOLEAN_YES);
  393. do_processes = config_get_boolean("plugin:proc:/proc/stat", "processes running", CONFIG_BOOLEAN_YES);
  394. // give sane defaults based on the number of processors
  395. if(unlikely(get_system_cpus() > 128)) {
  396. // the system has too many processors
  397. keep_per_core_fds_open = CONFIG_BOOLEAN_NO;
  398. do_core_throttle_count = CONFIG_BOOLEAN_NO;
  399. do_package_throttle_count = CONFIG_BOOLEAN_NO;
  400. do_cpu_freq = CONFIG_BOOLEAN_NO;
  401. do_cpuidle = CONFIG_BOOLEAN_NO;
  402. }
  403. else {
  404. // the system has a reasonable number of processors
  405. keep_per_core_fds_open = CONFIG_BOOLEAN_YES;
  406. do_core_throttle_count = CONFIG_BOOLEAN_AUTO;
  407. do_package_throttle_count = CONFIG_BOOLEAN_NO;
  408. do_cpu_freq = CONFIG_BOOLEAN_YES;
  409. do_cpuidle = CONFIG_BOOLEAN_NO;
  410. }
  411. if(unlikely(get_system_cpus() > 24)) {
  412. // the system has too many processors
  413. keep_cpuidle_fds_open = CONFIG_BOOLEAN_NO;
  414. }
  415. else {
  416. // the system has a reasonable number of processors
  417. keep_cpuidle_fds_open = CONFIG_BOOLEAN_YES;
  418. }
  419. keep_per_core_fds_open = config_get_boolean("plugin:proc:/proc/stat", "keep per core files open", keep_per_core_fds_open);
  420. keep_cpuidle_fds_open = config_get_boolean("plugin:proc:/proc/stat", "keep cpuidle files open", keep_cpuidle_fds_open);
  421. do_core_throttle_count = config_get_boolean_ondemand("plugin:proc:/proc/stat", "core_throttle_count", do_core_throttle_count);
  422. do_package_throttle_count = config_get_boolean_ondemand("plugin:proc:/proc/stat", "package_throttle_count", do_package_throttle_count);
  423. do_cpu_freq = config_get_boolean_ondemand("plugin:proc:/proc/stat", "cpu frequency", do_cpu_freq);
  424. do_cpuidle = config_get_boolean_ondemand("plugin:proc:/proc/stat", "cpu idle states", do_cpuidle);
  425. hash_intr = simple_hash("intr");
  426. hash_ctxt = simple_hash("ctxt");
  427. hash_processes = simple_hash("processes");
  428. hash_procs_running = simple_hash("procs_running");
  429. hash_procs_blocked = simple_hash("procs_blocked");
  430. char filename[FILENAME_MAX + 1];
  431. snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/thermal_throttle/core_throttle_count");
  432. core_throttle_count_filename = config_get("plugin:proc:/proc/stat", "core_throttle_count filename to monitor", filename);
  433. snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/thermal_throttle/package_throttle_count");
  434. package_throttle_count_filename = config_get("plugin:proc:/proc/stat", "package_throttle_count filename to monitor", filename);
  435. snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/cpufreq/scaling_cur_freq");
  436. scaling_cur_freq_filename = config_get("plugin:proc:/proc/stat", "scaling_cur_freq filename to monitor", filename);
  437. snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/%s/cpufreq/stats/time_in_state");
  438. time_in_state_filename = config_get("plugin:proc:/proc/stat", "time_in_state filename to monitor", filename);
  439. snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/schedstat");
  440. schedstat_filename = config_get("plugin:proc:/proc/stat", "schedstat filename to monitor", filename);
  441. if(do_cpuidle != CONFIG_BOOLEAN_NO) {
  442. struct stat stbuf;
  443. if (stat(schedstat_filename, &stbuf))
  444. do_cpuidle = CONFIG_BOOLEAN_NO;
  445. }
  446. snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/cpu%zu/cpuidle/state%zu/name");
  447. cpuidle_name_filename = config_get("plugin:proc:/proc/stat", "cpuidle name filename to monitor", filename);
  448. snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/cpu/cpu%zu/cpuidle/state%zu/time");
  449. cpuidle_time_filename = config_get("plugin:proc:/proc/stat", "cpuidle time filename to monitor", filename);
  450. }
  451. if(unlikely(!ff)) {
  452. char filename[FILENAME_MAX + 1];
  453. snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/stat");
  454. ff = procfile_open(config_get("plugin:proc:/proc/stat", "filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT);
  455. if(unlikely(!ff)) return 1;
  456. }
  457. ff = procfile_readall(ff);
  458. if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time
  459. size_t lines = procfile_lines(ff), l;
  460. size_t words;
  461. unsigned long long processes = 0, running = 0 , blocked = 0;
  462. for(l = 0; l < lines ;l++) {
  463. char *row_key = procfile_lineword(ff, l, 0);
  464. uint32_t hash = simple_hash(row_key);
  465. // faster strncmp(row_key, "cpu", 3) == 0
  466. if(likely(row_key[0] == 'c' && row_key[1] == 'p' && row_key[2] == 'u')) {
  467. words = procfile_linewords(ff, l);
  468. if(unlikely(words < 9)) {
  469. collector_error("Cannot read /proc/stat cpu line. Expected 9 params, read %zu.", words);
  470. continue;
  471. }
  472. size_t core = (row_key[3] == '\0') ? 0 : str2ul(&row_key[3]) + 1;
  473. if (likely(core > 0))
  474. cores_found = core;
  475. bool do_any_core_metric = do_cpu_cores || do_core_throttle_count || do_cpu_freq || do_cpuidle;
  476. if (likely((core == 0 && do_cpu) || (core > 0 && do_any_core_metric))) {
  477. if (unlikely(core >= all_cpu_charts_size)) {
  478. size_t old_cpu_charts_size = all_cpu_charts_size;
  479. all_cpu_charts_size = core + 1;
  480. all_cpu_charts = reallocz(all_cpu_charts, sizeof(struct cpu_chart) * all_cpu_charts_size);
  481. memset(&all_cpu_charts[old_cpu_charts_size], 0, sizeof(struct cpu_chart) * (all_cpu_charts_size - old_cpu_charts_size));
  482. }
  483. struct cpu_chart *cpu_chart = &all_cpu_charts[core];
  484. if (unlikely(!cpu_chart->id))
  485. cpu_chart->id = strdupz(row_key);
  486. if (core > 0 && !cpu_chart->per_core_files_found) {
  487. cpu_chart->per_core_files_found = true;
  488. char filename[FILENAME_MAX + 1];
  489. struct stat stbuf;
  490. if (do_core_throttle_count != CONFIG_BOOLEAN_NO) {
  491. snprintfz(filename, FILENAME_MAX, core_throttle_count_filename, cpu_chart->id);
  492. if (stat(filename, &stbuf) == 0) {
  493. cpu_chart->files[CORE_THROTTLE_COUNT_INDEX].filename = strdupz(filename);
  494. cpu_chart->files[CORE_THROTTLE_COUNT_INDEX].fd = -1;
  495. do_core_throttle_count = CONFIG_BOOLEAN_YES;
  496. }
  497. }
  498. if (do_package_throttle_count != CONFIG_BOOLEAN_NO) {
  499. snprintfz(filename, FILENAME_MAX, package_throttle_count_filename, cpu_chart->id);
  500. if (stat(filename, &stbuf) == 0) {
  501. cpu_chart->files[PACKAGE_THROTTLE_COUNT_INDEX].filename = strdupz(filename);
  502. cpu_chart->files[PACKAGE_THROTTLE_COUNT_INDEX].fd = -1;
  503. do_package_throttle_count = CONFIG_BOOLEAN_YES;
  504. }
  505. }
  506. if (do_cpu_freq != CONFIG_BOOLEAN_NO) {
  507. snprintfz(filename, FILENAME_MAX, scaling_cur_freq_filename, cpu_chart->id);
  508. if (stat(filename, &stbuf) == 0) {
  509. cpu_chart->files[CPU_FREQ_INDEX].filename = strdupz(filename);
  510. cpu_chart->files[CPU_FREQ_INDEX].fd = -1;
  511. do_cpu_freq = CONFIG_BOOLEAN_YES;
  512. }
  513. snprintfz(filename, FILENAME_MAX, time_in_state_filename, cpu_chart->id);
  514. if (stat(filename, &stbuf) == 0) {
  515. cpu_chart->time_in_state_files.filename = strdupz(filename);
  516. cpu_chart->time_in_state_files.ff = NULL;
  517. do_cpu_freq = CONFIG_BOOLEAN_YES;
  518. accurate_freq_avail = 1;
  519. }
  520. }
  521. }
  522. }
  523. if(likely((core == 0 && do_cpu) || (core > 0 && do_cpu_cores))) {
  524. unsigned long long user = 0, nice = 0, system = 0, idle = 0, iowait = 0, irq = 0, softirq = 0, steal = 0, guest = 0, guest_nice = 0;
  525. user = str2ull(procfile_lineword(ff, l, 1), NULL);
  526. nice = str2ull(procfile_lineword(ff, l, 2), NULL);
  527. system = str2ull(procfile_lineword(ff, l, 3), NULL);
  528. idle = str2ull(procfile_lineword(ff, l, 4), NULL);
  529. iowait = str2ull(procfile_lineword(ff, l, 5), NULL);
  530. irq = str2ull(procfile_lineword(ff, l, 6), NULL);
  531. softirq = str2ull(procfile_lineword(ff, l, 7), NULL);
  532. steal = str2ull(procfile_lineword(ff, l, 8), NULL);
  533. guest = str2ull(procfile_lineword(ff, l, 9), NULL);
  534. user -= guest;
  535. guest_nice = str2ull(procfile_lineword(ff, l, 10), NULL);
  536. nice -= guest_nice;
  537. char *title, *type, *context, *family;
  538. long priority;
  539. struct cpu_chart *cpu_chart = &all_cpu_charts[core];
  540. char *id = row_key;
  541. if(unlikely(!cpu_chart->st)) {
  542. if(unlikely(core == 0)) {
  543. title = "Total CPU utilization";
  544. type = "system";
  545. context = "system.cpu";
  546. family = id;
  547. priority = NETDATA_CHART_PRIO_SYSTEM_CPU;
  548. }
  549. else {
  550. title = "Core utilization";
  551. type = "cpu";
  552. context = "cpu.cpu";
  553. family = "utilization";
  554. priority = NETDATA_CHART_PRIO_CPU_PER_CORE;
  555. }
  556. cpu_chart->st = rrdset_create_localhost(
  557. type
  558. , id
  559. , NULL
  560. , family
  561. , context
  562. , title
  563. , "percentage"
  564. , PLUGIN_PROC_NAME
  565. , PLUGIN_PROC_MODULE_STAT_NAME
  566. , priority + core
  567. , update_every
  568. , RRDSET_TYPE_STACKED
  569. );
  570. long multiplier = 1;
  571. long divisor = 1; // sysconf(_SC_CLK_TCK);
  572. cpu_chart->rd_guest_nice = rrddim_add(cpu_chart->st, "guest_nice", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  573. cpu_chart->rd_guest = rrddim_add(cpu_chart->st, "guest", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  574. cpu_chart->rd_steal = rrddim_add(cpu_chart->st, "steal", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  575. cpu_chart->rd_softirq = rrddim_add(cpu_chart->st, "softirq", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  576. cpu_chart->rd_irq = rrddim_add(cpu_chart->st, "irq", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  577. cpu_chart->rd_user = rrddim_add(cpu_chart->st, "user", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  578. cpu_chart->rd_system = rrddim_add(cpu_chart->st, "system", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  579. cpu_chart->rd_nice = rrddim_add(cpu_chart->st, "nice", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  580. cpu_chart->rd_iowait = rrddim_add(cpu_chart->st, "iowait", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  581. cpu_chart->rd_idle = rrddim_add(cpu_chart->st, "idle", NULL, multiplier, divisor, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  582. rrddim_hide(cpu_chart->st, "idle");
  583. if (core > 0) {
  584. char cpu_core[50 + 1];
  585. snprintfz(cpu_core, 50, "cpu%lu", core - 1);
  586. rrdlabels_add(cpu_chart->st->rrdlabels, "cpu", cpu_core, RRDLABEL_SRC_AUTO);
  587. }
  588. if(unlikely(core == 0 && cpus_var == NULL))
  589. cpus_var = rrdvar_custom_host_variable_add_and_acquire(localhost, "active_processors");
  590. }
  591. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_user, user);
  592. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_nice, nice);
  593. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_system, system);
  594. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_idle, idle);
  595. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_iowait, iowait);
  596. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_irq, irq);
  597. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_softirq, softirq);
  598. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_steal, steal);
  599. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_guest, guest);
  600. rrddim_set_by_pointer(cpu_chart->st, cpu_chart->rd_guest_nice, guest_nice);
  601. rrdset_done(cpu_chart->st);
  602. }
  603. }
  604. else if(unlikely(hash == hash_intr && strcmp(row_key, "intr") == 0)) {
  605. if(likely(do_interrupts)) {
  606. static RRDSET *st_intr = NULL;
  607. static RRDDIM *rd_interrupts = NULL;
  608. unsigned long long value = str2ull(procfile_lineword(ff, l, 1), NULL);
  609. if(unlikely(!st_intr)) {
  610. st_intr = rrdset_create_localhost(
  611. "system"
  612. , "intr"
  613. , NULL
  614. , "interrupts"
  615. , NULL
  616. , "CPU Interrupts"
  617. , "interrupts/s"
  618. , PLUGIN_PROC_NAME
  619. , PLUGIN_PROC_MODULE_STAT_NAME
  620. , NETDATA_CHART_PRIO_SYSTEM_INTR
  621. , update_every
  622. , RRDSET_TYPE_LINE
  623. );
  624. rrdset_flag_set(st_intr, RRDSET_FLAG_DETAIL);
  625. rd_interrupts = rrddim_add(st_intr, "interrupts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  626. }
  627. rrddim_set_by_pointer(st_intr, rd_interrupts, value);
  628. rrdset_done(st_intr);
  629. }
  630. }
  631. else if(unlikely(hash == hash_ctxt && strcmp(row_key, "ctxt") == 0)) {
  632. if(likely(do_context)) {
  633. static RRDSET *st_ctxt = NULL;
  634. static RRDDIM *rd_switches = NULL;
  635. unsigned long long value = str2ull(procfile_lineword(ff, l, 1), NULL);
  636. if(unlikely(!st_ctxt)) {
  637. st_ctxt = rrdset_create_localhost(
  638. "system"
  639. , "ctxt"
  640. , NULL
  641. , "processes"
  642. , NULL
  643. , "CPU Context Switches"
  644. , "context switches/s"
  645. , PLUGIN_PROC_NAME
  646. , PLUGIN_PROC_MODULE_STAT_NAME
  647. , NETDATA_CHART_PRIO_SYSTEM_CTXT
  648. , update_every
  649. , RRDSET_TYPE_LINE
  650. );
  651. rd_switches = rrddim_add(st_ctxt, "switches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  652. }
  653. rrddim_set_by_pointer(st_ctxt, rd_switches, value);
  654. rrdset_done(st_ctxt);
  655. }
  656. }
  657. else if(unlikely(hash == hash_processes && !processes && strcmp(row_key, "processes") == 0)) {
  658. processes = str2ull(procfile_lineword(ff, l, 1), NULL);
  659. }
  660. else if(unlikely(hash == hash_procs_running && !running && strcmp(row_key, "procs_running") == 0)) {
  661. running = str2ull(procfile_lineword(ff, l, 1), NULL);
  662. }
  663. else if(unlikely(hash == hash_procs_blocked && !blocked && strcmp(row_key, "procs_blocked") == 0)) {
  664. blocked = str2ull(procfile_lineword(ff, l, 1), NULL);
  665. }
  666. }
  667. // --------------------------------------------------------------------
  668. if(likely(do_forks)) {
  669. static RRDSET *st_forks = NULL;
  670. static RRDDIM *rd_started = NULL;
  671. if(unlikely(!st_forks)) {
  672. st_forks = rrdset_create_localhost(
  673. "system"
  674. , "forks"
  675. , NULL
  676. , "processes"
  677. , NULL
  678. , "Started Processes"
  679. , "processes/s"
  680. , PLUGIN_PROC_NAME
  681. , PLUGIN_PROC_MODULE_STAT_NAME
  682. , NETDATA_CHART_PRIO_SYSTEM_FORKS
  683. , update_every
  684. , RRDSET_TYPE_LINE
  685. );
  686. rrdset_flag_set(st_forks, RRDSET_FLAG_DETAIL);
  687. rd_started = rrddim_add(st_forks, "started", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  688. }
  689. rrddim_set_by_pointer(st_forks, rd_started, processes);
  690. rrdset_done(st_forks);
  691. }
  692. // --------------------------------------------------------------------
  693. if(likely(do_processes)) {
  694. static RRDSET *st_processes = NULL;
  695. static RRDDIM *rd_running = NULL;
  696. static RRDDIM *rd_blocked = NULL;
  697. if(unlikely(!st_processes)) {
  698. st_processes = rrdset_create_localhost(
  699. "system"
  700. , "processes"
  701. , NULL
  702. , "processes"
  703. , NULL
  704. , "System Processes"
  705. , "processes"
  706. , PLUGIN_PROC_NAME
  707. , PLUGIN_PROC_MODULE_STAT_NAME
  708. , NETDATA_CHART_PRIO_SYSTEM_PROCESSES
  709. , update_every
  710. , RRDSET_TYPE_LINE
  711. );
  712. rd_running = rrddim_add(st_processes, "running", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
  713. rd_blocked = rrddim_add(st_processes, "blocked", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE);
  714. }
  715. rrddim_set_by_pointer(st_processes, rd_running, running);
  716. rrddim_set_by_pointer(st_processes, rd_blocked, blocked);
  717. rrdset_done(st_processes);
  718. }
  719. if(likely(all_cpu_charts_size > 1)) {
  720. if(likely(do_core_throttle_count != CONFIG_BOOLEAN_NO)) {
  721. int r = read_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CORE_THROTTLE_COUNT_INDEX);
  722. if(likely(r != -1 && (do_core_throttle_count == CONFIG_BOOLEAN_YES || r > 0))) {
  723. do_core_throttle_count = CONFIG_BOOLEAN_YES;
  724. static RRDSET *st_core_throttle_count = NULL;
  725. if (unlikely(!st_core_throttle_count)) {
  726. st_core_throttle_count = rrdset_create_localhost(
  727. "cpu"
  728. , "core_throttling"
  729. , NULL
  730. , "throttling"
  731. , "cpu.core_throttling"
  732. , "Core Thermal Throttling Events"
  733. , "events/s"
  734. , PLUGIN_PROC_NAME
  735. , PLUGIN_PROC_MODULE_STAT_NAME
  736. , NETDATA_CHART_PRIO_CORE_THROTTLING
  737. , update_every
  738. , RRDSET_TYPE_LINE
  739. );
  740. }
  741. chart_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CORE_THROTTLE_COUNT_INDEX, st_core_throttle_count, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  742. rrdset_done(st_core_throttle_count);
  743. }
  744. }
  745. if(likely(do_package_throttle_count != CONFIG_BOOLEAN_NO)) {
  746. int r = read_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, PACKAGE_THROTTLE_COUNT_INDEX);
  747. if(likely(r != -1 && (do_package_throttle_count == CONFIG_BOOLEAN_YES || r > 0))) {
  748. do_package_throttle_count = CONFIG_BOOLEAN_YES;
  749. static RRDSET *st_package_throttle_count = NULL;
  750. if(unlikely(!st_package_throttle_count)) {
  751. st_package_throttle_count = rrdset_create_localhost(
  752. "cpu"
  753. , "package_throttling"
  754. , NULL
  755. , "throttling"
  756. , "cpu.package_throttling"
  757. , "Package Thermal Throttling Events"
  758. , "events/s"
  759. , PLUGIN_PROC_NAME
  760. , PLUGIN_PROC_MODULE_STAT_NAME
  761. , NETDATA_CHART_PRIO_PACKAGE_THROTTLING
  762. , update_every
  763. , RRDSET_TYPE_LINE
  764. );
  765. }
  766. chart_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, PACKAGE_THROTTLE_COUNT_INDEX, st_package_throttle_count, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  767. rrdset_done(st_package_throttle_count);
  768. }
  769. }
  770. if(likely(do_cpu_freq != CONFIG_BOOLEAN_NO)) {
  771. char filename[FILENAME_MAX + 1];
  772. int r = 0;
  773. if (accurate_freq_avail) {
  774. r = read_per_core_time_in_state_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CPU_FREQ_INDEX);
  775. if(r > 0 && !accurate_freq_is_used) {
  776. accurate_freq_is_used = 1;
  777. snprintfz(filename, FILENAME_MAX, time_in_state_filename, "cpu*");
  778. collector_info("cpufreq is using %s", filename);
  779. }
  780. }
  781. if (r < 1) {
  782. r = read_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CPU_FREQ_INDEX);
  783. if(accurate_freq_is_used) {
  784. accurate_freq_is_used = 0;
  785. snprintfz(filename, FILENAME_MAX, scaling_cur_freq_filename, "cpu*");
  786. collector_info("cpufreq fell back to %s", filename);
  787. }
  788. }
  789. if(likely(r != -1 && (do_cpu_freq == CONFIG_BOOLEAN_YES || r > 0))) {
  790. do_cpu_freq = CONFIG_BOOLEAN_YES;
  791. static RRDSET *st_scaling_cur_freq = NULL;
  792. if(unlikely(!st_scaling_cur_freq)) {
  793. st_scaling_cur_freq = rrdset_create_localhost(
  794. "cpu"
  795. , "cpufreq"
  796. , NULL
  797. , "cpufreq"
  798. , "cpufreq.cpufreq"
  799. , "Current CPU Frequency"
  800. , "MHz"
  801. , PLUGIN_PROC_NAME
  802. , PLUGIN_PROC_MODULE_STAT_NAME
  803. , NETDATA_CHART_PRIO_CPUFREQ_SCALING_CUR_FREQ
  804. , update_every
  805. , RRDSET_TYPE_LINE
  806. );
  807. }
  808. chart_per_core_files(&all_cpu_charts[1], all_cpu_charts_size - 1, CPU_FREQ_INDEX, st_scaling_cur_freq, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
  809. rrdset_done(st_scaling_cur_freq);
  810. }
  811. }
  812. }
  813. // --------------------------------------------------------------------
  814. static struct per_core_cpuidle_chart *cpuidle_charts = NULL;
  815. size_t schedstat_cores_found = 0;
  816. if(likely(do_cpuidle != CONFIG_BOOLEAN_NO && !read_schedstat(schedstat_filename, &cpuidle_charts, &schedstat_cores_found))) {
  817. int cpu_states_updated = 0;
  818. size_t core, state;
  819. // proc.plugin runs on Linux systems only. Multi-platform compatibility is not needed here,
  820. // so bare pthread functions are used to avoid unneeded overheads.
  821. for(core = 0; core < schedstat_cores_found; core++) {
  822. if(unlikely(!(cpuidle_charts[core].active_time - cpuidle_charts[core].last_active_time))) {
  823. pthread_t thread;
  824. cpu_set_t global_cpu_set;
  825. if (likely(!pthread_getaffinity_np(pthread_self(), sizeof(cpu_set_t), &global_cpu_set))) {
  826. if (unlikely(!CPU_ISSET(core, &global_cpu_set))) {
  827. continue;
  828. }
  829. }
  830. else
  831. collector_error("Cannot read current process affinity");
  832. // These threads are very ephemeral and don't need to have a specific name
  833. if(unlikely(pthread_create(&thread, NULL, wake_cpu_thread, (void *)&core)))
  834. collector_error("Cannot create wake_cpu_thread");
  835. else if(unlikely(pthread_join(thread, NULL)))
  836. collector_error("Cannot join wake_cpu_thread");
  837. cpu_states_updated = 1;
  838. }
  839. }
  840. if(unlikely(!cpu_states_updated || !read_schedstat(schedstat_filename, &cpuidle_charts, &schedstat_cores_found))) {
  841. for(core = 0; core < schedstat_cores_found; core++) {
  842. cpuidle_charts[core].last_active_time = cpuidle_charts[core].active_time;
  843. int r = read_cpuidle_states(cpuidle_name_filename, cpuidle_time_filename, cpuidle_charts, core);
  844. if(likely(r != -1 && (do_cpuidle == CONFIG_BOOLEAN_YES || r > 0))) {
  845. do_cpuidle = CONFIG_BOOLEAN_YES;
  846. char cpuidle_chart_id[RRD_ID_LENGTH_MAX + 1];
  847. snprintfz(cpuidle_chart_id, RRD_ID_LENGTH_MAX, "cpu%zu_cpuidle", core);
  848. if(unlikely(!cpuidle_charts[core].st)) {
  849. cpuidle_charts[core].st = rrdset_create_localhost(
  850. "cpu"
  851. , cpuidle_chart_id
  852. , NULL
  853. , "cpuidle"
  854. , "cpuidle.cpu_cstate_residency_time"
  855. , "C-state residency time"
  856. , "percentage"
  857. , PLUGIN_PROC_NAME
  858. , PLUGIN_PROC_MODULE_STAT_NAME
  859. , NETDATA_CHART_PRIO_CPUIDLE + core
  860. , update_every
  861. , RRDSET_TYPE_STACKED
  862. );
  863. char corebuf[50+1];
  864. snprintfz(corebuf, sizeof(corebuf) - 1, "cpu%zu", core);
  865. rrdlabels_add(cpuidle_charts[core].st->rrdlabels, "cpu", corebuf, RRDLABEL_SRC_AUTO);
  866. char cpuidle_dim_id[RRD_ID_LENGTH_MAX + 1];
  867. cpuidle_charts[core].active_time_rd = rrddim_add(cpuidle_charts[core].st, "active", "C0 (active)", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  868. for(state = 0; state < cpuidle_charts[core].cpuidle_state_len; state++) {
  869. strncpyz(cpuidle_dim_id, cpuidle_charts[core].cpuidle_state[state].name, RRD_ID_LENGTH_MAX);
  870. for(int i = 0; cpuidle_dim_id[i]; i++)
  871. cpuidle_dim_id[i] = tolower(cpuidle_dim_id[i]);
  872. cpuidle_charts[core].cpuidle_state[state].rd = rrddim_add(cpuidle_charts[core].st, cpuidle_dim_id,
  873. cpuidle_charts[core].cpuidle_state[state].name,
  874. 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL);
  875. }
  876. }
  877. rrddim_set_by_pointer(cpuidle_charts[core].st, cpuidle_charts[core].active_time_rd, cpuidle_charts[core].active_time);
  878. for(state = 0; state < cpuidle_charts[core].cpuidle_state_len; state++) {
  879. rrddim_set_by_pointer(cpuidle_charts[core].st, cpuidle_charts[core].cpuidle_state[state].rd, cpuidle_charts[core].cpuidle_state[state].value);
  880. }
  881. rrdset_done(cpuidle_charts[core].st);
  882. }
  883. }
  884. }
  885. }
  886. if(cpus_var)
  887. rrdvar_custom_host_variable_set(localhost, cpus_var, cores_found);
  888. return 0;
  889. }