cgroup-discovery.c 47 KB


  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "cgroup-internals.h"
  3. // discovery cgroup thread worker jobs
  4. #define WORKER_DISCOVERY_INIT 0
  5. #define WORKER_DISCOVERY_FIND 1
  6. #define WORKER_DISCOVERY_PROCESS 2
  7. #define WORKER_DISCOVERY_PROCESS_RENAME 3
  8. #define WORKER_DISCOVERY_PROCESS_NETWORK 4
  9. #define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5
  10. #define WORKER_DISCOVERY_UPDATE 6
  11. #define WORKER_DISCOVERY_CLEANUP 7
  12. #define WORKER_DISCOVERY_COPY 8
  13. #define WORKER_DISCOVERY_SHARE 9
  14. #define WORKER_DISCOVERY_LOCK 10
  15. #if WORKER_UTILIZATION_MAX_JOB_TYPES < 11
  16. #error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11
  17. #endif
  18. struct cgroup *discovered_cgroup_root = NULL;
  19. char cgroup_chart_id_prefix[] = "cgroup_";
  20. char services_chart_id_prefix[] = "systemd_";
  21. char *cgroups_rename_script = NULL;
  22. // ----------------------------------------------------------------------------
  23. static inline void free_pressure(struct pressure *res) {
  24. if (res->some.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.share_time.st);
  25. if (res->some.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.total_time.st);
  26. if (res->full.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.share_time.st);
  27. if (res->full.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.total_time.st);
  28. freez(res->filename);
  29. }
  30. static inline void cgroup_free_network_interfaces(struct cgroup *cg) {
  31. while(cg->interfaces) {
  32. struct cgroup_network_interface *i = cg->interfaces;
  33. cg->interfaces = i->next;
  34. // delete the registration of proc_net_dev rename
  35. netdev_rename_device_del(i->host_device);
  36. freez((void *)i->host_device);
  37. freez((void *)i->container_device);
  38. freez((void *)i);
  39. }
  40. }
  41. static inline void cgroup_free(struct cgroup *cg) {
  42. netdata_log_debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available");
  43. cgroup_netdev_delete(cg);
  44. if(cg->st_cpu) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu);
  45. if(cg->st_cpu_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit);
  46. if(cg->st_cpu_per_core) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_per_core);
  47. if(cg->st_cpu_nr_throttled) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_nr_throttled);
  48. if(cg->st_cpu_throttled_time) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_throttled_time);
  49. if(cg->st_cpu_shares) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_shares);
  50. if(cg->st_mem) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem);
  51. if(cg->st_writeback) rrdset_is_obsolete___safe_from_collector_thread(cg->st_writeback);
  52. if(cg->st_mem_activity) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_activity);
  53. if(cg->st_pgfaults) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pgfaults);
  54. if(cg->st_mem_usage) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage);
  55. if(cg->st_mem_usage_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage_limit);
  56. if(cg->st_mem_utilization) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_utilization);
  57. if(cg->st_mem_failcnt) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_failcnt);
  58. if(cg->st_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_io);
  59. if(cg->st_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_serviced_ops);
  60. if(cg->st_throttle_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_io);
  61. if(cg->st_throttle_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_serviced_ops);
  62. if(cg->st_queued_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_queued_ops);
  63. if(cg->st_merged_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_merged_ops);
  64. if(cg->st_pids) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pids);
  65. freez(cg->filename_cpuset_cpus);
  66. freez(cg->filename_cpu_cfs_period);
  67. freez(cg->filename_cpu_cfs_quota);
  68. freez(cg->filename_memory_limit);
  69. freez(cg->filename_memoryswap_limit);
  70. cgroup_free_network_interfaces(cg);
  71. freez(cg->cpuacct_usage.cpu_percpu);
  72. freez(cg->cpuacct_stat.filename);
  73. freez(cg->cpuacct_usage.filename);
  74. freez(cg->cpuacct_cpu_throttling.filename);
  75. freez(cg->cpuacct_cpu_shares.filename);
  76. arl_free(cg->memory.arl_base);
  77. freez(cg->memory.filename_detailed);
  78. freez(cg->memory.filename_failcnt);
  79. freez(cg->memory.filename_usage_in_bytes);
  80. freez(cg->memory.filename_msw_usage_in_bytes);
  81. freez(cg->io_service_bytes.filename);
  82. freez(cg->io_serviced.filename);
  83. freez(cg->throttle_io_service_bytes.filename);
  84. freez(cg->throttle_io_serviced.filename);
  85. freez(cg->io_merged.filename);
  86. freez(cg->io_queued.filename);
  87. freez(cg->pids.pids_current_filename);
  88. free_pressure(&cg->cpu_pressure);
  89. free_pressure(&cg->io_pressure);
  90. free_pressure(&cg->memory_pressure);
  91. free_pressure(&cg->irq_pressure);
  92. freez(cg->id);
  93. freez(cg->intermediate_id);
  94. freez(cg->chart_id);
  95. freez(cg->name);
  96. rrdlabels_destroy(cg->chart_labels);
  97. freez(cg);
  98. cgroup_root_count--;
  99. }
  100. // ----------------------------------------------------------------------------
  101. // add/remove/find cgroup objects
  102. #define CGROUP_CHARTID_LINE_MAX 1024
  103. static inline char *cgroup_chart_id_strdupz(const char *s) {
  104. if(!s || !*s) s = "/";
  105. if(*s == '/' && s[1] != '\0') s++;
  106. char *r = strdupz(s);
  107. netdata_fix_chart_id(r);
  108. return r;
  109. }
  110. // TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed
  111. static inline void substitute_dots_in_id(char *s) {
  112. // dots are used to distinguish chart type and id in streaming, so we should replace them
  113. for (char *d = s; *d; d++) {
  114. if (*d == '.')
  115. *d = '-';
  116. }
  117. }
  118. // ----------------------------------------------------------------------------
  119. // parse k8s labels
  120. char *cgroup_parse_resolved_name_and_labels(RRDLABELS *labels, char *data) {
  121. // the first word, up to the first space is the name
  122. char *name = strsep_skip_consecutive_separators(&data, " ");
  123. // the rest are key=value pairs separated by comma
  124. while(data) {
  125. char *pair = strsep_skip_consecutive_separators(&data, ",");
  126. rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S);
  127. }
  128. return name;
  129. }
  130. static inline void discovery_rename_cgroup(struct cgroup *cg) {
  131. if (!cg->pending_renames) {
  132. return;
  133. }
  134. cg->pending_renames--;
  135. netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id);
  136. netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id);
  137. pid_t cgroup_pid;
  138. FILE *fp_child_input, *fp_child_output;
  139. (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id);
  140. if (!fp_child_output) {
  141. collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id);
  142. cg->pending_renames = 0;
  143. cg->processed = 1;
  144. return;
  145. }
  146. char buffer[CGROUP_CHARTID_LINE_MAX + 1];
  147. char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output);
  148. int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
  149. switch (exit_code) {
  150. case 0:
  151. cg->pending_renames = 0;
  152. break;
  153. case 3:
  154. cg->pending_renames = 0;
  155. cg->processed = 1;
  156. break;
  157. default:
  158. break;
  159. }
  160. if (cg->pending_renames || cg->processed)
  161. return;
  162. if (!new_name || !*new_name || *new_name == '\n')
  163. return;
  164. if (!(new_name = trim(new_name)))
  165. return;
  166. if (!cg->chart_labels)
  167. cg->chart_labels = rrdlabels_create();
  168. // read the new labels and remove the obsolete ones
  169. rrdlabels_unmark_all(cg->chart_labels);
  170. char *name = cgroup_parse_resolved_name_and_labels(cg->chart_labels, new_name);
  171. rrdlabels_remove_all_unmarked(cg->chart_labels);
  172. freez(cg->name);
  173. cg->name = strdupz(name);
  174. freez(cg->chart_id);
  175. cg->chart_id = cgroup_chart_id_strdupz(name);
  176. substitute_dots_in_id(cg->chart_id);
  177. cg->hash_chart_id = simple_hash(cg->chart_id);
  178. }
  179. static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) {
  180. struct stat buf;
  181. snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id);
  182. if (likely(stat(out->path, &buf) == 0)) {
  183. return;
  184. }
  185. snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id);
  186. if (likely(stat(out->path, &buf) == 0)) {
  187. return;
  188. }
  189. snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id);
  190. if (likely(stat(out->path, &buf) == 0)) {
  191. return;
  192. }
  193. snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id);
  194. if (likely(stat(out->path, &buf) == 0)) {
  195. return;
  196. }
  197. out->path[0] = '\0';
  198. out->enabled = 0;
  199. }
  200. static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) {
  201. char buffer[CGROUP_CHARTID_LINE_MAX + 1];
  202. cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE;
  203. strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX);
  204. char *s = buffer;
  205. // skip to the last slash
  206. size_t len = strlen(s);
  207. while (len--) {
  208. if (unlikely(s[len] == '/')) {
  209. break;
  210. }
  211. }
  212. if (len) {
  213. s = &s[len + 1];
  214. }
  215. // remove extension
  216. len = strlen(s);
  217. while (len--) {
  218. if (unlikely(s[len] == '.')) {
  219. break;
  220. }
  221. }
  222. if (len) {
  223. s[len] = '\0';
  224. }
  225. freez(cg->name);
  226. cg->name = strdupz(s);
  227. freez(cg->chart_id);
  228. cg->chart_id = cgroup_chart_id_strdupz(s);
  229. substitute_dots_in_id(cg->chart_id);
  230. cg->hash_chart_id = simple_hash(cg->chart_id);
  231. }
  232. static inline struct cgroup *discovery_cgroup_add(const char *id) {
  233. netdata_log_debug(D_CGROUP, "adding to list, cgroup with id '%s'", id);
  234. struct cgroup *cg = callocz(1, sizeof(struct cgroup));
  235. cg->id = strdupz(id);
  236. cg->hash = simple_hash(cg->id);
  237. cg->name = strdupz(id);
  238. cg->intermediate_id = cgroup_chart_id_strdupz(id);
  239. cg->chart_id = cgroup_chart_id_strdupz(id);
  240. substitute_dots_in_id(cg->chart_id);
  241. cg->hash_chart_id = simple_hash(cg->chart_id);
  242. if (cgroup_use_unified_cgroups) {
  243. cg->options |= CGROUP_OPTIONS_IS_UNIFIED;
  244. }
  245. if (!discovered_cgroup_root)
  246. discovered_cgroup_root = cg;
  247. else {
  248. struct cgroup *t;
  249. for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) {
  250. }
  251. t->discovered_next = cg;
  252. }
  253. return cg;
  254. }
  255. static inline struct cgroup *discovery_cgroup_find(const char *id) {
  256. netdata_log_debug(D_CGROUP, "searching for cgroup '%s'", id);
  257. uint32_t hash = simple_hash(id);
  258. struct cgroup *cg;
  259. for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) {
  260. if(hash == cg->hash && strcmp(id, cg->id) == 0)
  261. break;
  262. }
  263. netdata_log_debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found");
  264. return cg;
  265. }
  266. static int calc_cgroup_depth(const char *id) {
  267. int depth = 0;
  268. const char *s;
  269. for (s = id; *s; s++) {
  270. depth += unlikely(*s == '/');
  271. }
  272. return depth;
  273. }
  274. static inline void discovery_find_cgroup_in_dir_callback(const char *dir) {
  275. if (!dir || !*dir) {
  276. dir = "/";
  277. }
  278. netdata_log_debug(D_CGROUP, "examining cgroup dir '%s'", dir);
  279. struct cgroup *cg = discovery_cgroup_find(dir);
  280. if (cg) {
  281. cg->available = 1;
  282. return;
  283. }
  284. if (cgroup_root_count >= cgroup_root_max) {
  285. nd_log_limit_static_global_var(erl, 3600, 0);
  286. nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING, "CGROUP: maximum number of cgroups reached (%d). No more cgroups will be added.", cgroup_root_count);
  287. return;
  288. }
  289. if (cgroup_max_depth > 0) {
  290. int depth = calc_cgroup_depth(dir);
  291. if (depth > cgroup_max_depth) {
  292. nd_log_collector(NDLP_DEBUG, "CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth);
  293. return;
  294. }
  295. }
  296. cg = discovery_cgroup_add(dir);
  297. cg->available = 1;
  298. cg->first_time_seen = 1;
  299. cg->function_ready = false;
  300. cgroup_root_count++;
  301. }
  302. static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) {
  303. if(!this) this = base;
  304. netdata_log_debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base);
  305. size_t dirlen = strlen(this), baselen = strlen(base);
  306. int ret = -1;
  307. int enabled = -1;
  308. const char *relative_path = &this[baselen];
  309. if(!*relative_path) relative_path = "/";
  310. DIR *dir = opendir(this);
  311. if(!dir) {
  312. collector_error("CGROUP: cannot read directory '%s'", base);
  313. return ret;
  314. }
  315. ret = 1;
  316. callback(relative_path);
  317. struct dirent *de = NULL;
  318. while((de = readdir(dir))) {
  319. if(de->d_type == DT_DIR
  320. && (
  321. (de->d_name[0] == '.' && de->d_name[1] == '\0')
  322. || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0')
  323. ))
  324. continue;
  325. if(de->d_type == DT_DIR) {
  326. if(enabled == -1) {
  327. const char *r = relative_path;
  328. if(*r == '\0') r = "/";
  329. // do not decent in directories we are not interested
  330. enabled = matches_search_cgroup_paths(r);
  331. }
  332. if(enabled) {
  333. char *s = mallocz(dirlen + strlen(de->d_name) + 2);
  334. strcpy(s, this);
  335. strcat(s, "/");
  336. strcat(s, de->d_name);
  337. int ret2 = discovery_find_dir_in_subdirs(base, s, callback);
  338. if(ret2 > 0) ret += ret2;
  339. freez(s);
  340. }
  341. }
  342. }
  343. closedir(dir);
  344. return ret;
  345. }
  346. static inline void discovery_mark_as_unavailable_all_cgroups() {
  347. for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
  348. cg->available = 0;
  349. }
  350. }
  351. static inline void discovery_update_filenames_cgroup_v1(struct cgroup *cg) {
  352. char filename[FILENAME_MAX + 1];
  353. struct stat buf;
  354. // CPU
  355. if (unlikely(cgroup_enable_cpuacct_stat && !cg->cpuacct_stat.filename)) {
  356. snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.stat", cgroup_cpuacct_base, cg->id);
  357. if (likely(stat(filename, &buf) != -1)) {
  358. cg->cpuacct_stat.filename = strdupz(filename);
  359. cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat;
  360. snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id);
  361. cg->filename_cpuset_cpus = strdupz(filename);
  362. snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id);
  363. cg->filename_cpu_cfs_period = strdupz(filename);
  364. snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id);
  365. cg->filename_cpu_cfs_quota = strdupz(filename);
  366. }
  367. }
  368. // FIXME: remove usage_percpu
  369. if (unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) {
  370. snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id);
  371. if (likely(stat(filename, &buf) != -1)) {
  372. cg->cpuacct_usage.filename = strdupz(filename);
  373. cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage;
  374. }
  375. }
  376. if (unlikely(
  377. cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename &&
  378. !is_cgroup_systemd_service(cg))) {
  379. snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id);
  380. if (likely(stat(filename, &buf) != -1)) {
  381. cg->cpuacct_cpu_throttling.filename = strdupz(filename);
  382. cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling;
  383. }
  384. }
  385. if (unlikely(
  386. cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && !is_cgroup_systemd_service(cg))) {
  387. snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id);
  388. if (likely(stat(filename, &buf) != -1)) {
  389. cg->cpuacct_cpu_shares.filename = strdupz(filename);
  390. cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares;
  391. }
  392. }
  393. // Memory
  394. if (unlikely(
  395. (cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed &&
  396. (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) {
  397. snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id);
  398. if (likely(stat(filename, &buf) != -1)) {
  399. cg->memory.filename_detailed = strdupz(filename);
  400. cg->memory.enabled_detailed =
  401. (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_AUTO;
  402. }
  403. }
  404. if (unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) {
  405. snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id);
  406. if (likely(stat(filename, &buf) != -1)) {
  407. cg->memory.filename_usage_in_bytes = strdupz(filename);
  408. cg->memory.enabled_usage_in_bytes = cgroup_enable_memory;
  409. snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id);
  410. cg->filename_memory_limit = strdupz(filename);
  411. }
  412. }
  413. if (unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) {
  414. snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id);
  415. if (likely(stat(filename, &buf) != -1)) {
  416. cg->memory.filename_msw_usage_in_bytes = strdupz(filename);
  417. cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap;
  418. snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id);
  419. cg->filename_memoryswap_limit = strdupz(filename);
  420. }
  421. }
  422. if (unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) {
  423. snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id);
  424. if (likely(stat(filename, &buf) != -1)) {
  425. cg->memory.filename_failcnt = strdupz(filename);
  426. cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt;
  427. }
  428. }
  429. // Blkio
  430. if (unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) {
  431. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes_recursive", cgroup_blkio_base, cg->id);
  432. if (unlikely(stat(filename, &buf) != -1)) {
  433. cg->io_service_bytes.filename = strdupz(filename);
  434. cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
  435. } else {
  436. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id);
  437. if (likely(stat(filename, &buf) != -1)) {
  438. cg->io_service_bytes.filename = strdupz(filename);
  439. cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
  440. }
  441. }
  442. }
  443. if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) {
  444. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced_recursive", cgroup_blkio_base, cg->id);
  445. if (unlikely(stat(filename, &buf) != -1)) {
  446. cg->io_serviced.filename = strdupz(filename);
  447. cg->io_serviced.enabled = cgroup_enable_blkio_ops;
  448. } else {
  449. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id);
  450. if (likely(stat(filename, &buf) != -1)) {
  451. cg->io_serviced.filename = strdupz(filename);
  452. cg->io_serviced.enabled = cgroup_enable_blkio_ops;
  453. }
  454. }
  455. }
  456. if (unlikely(cgroup_enable_blkio_throttle_io && !cg->throttle_io_service_bytes.filename)) {
  457. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes_recursive", cgroup_blkio_base, cg->id);
  458. if (unlikely(stat(filename, &buf) != -1)) {
  459. cg->throttle_io_service_bytes.filename = strdupz(filename);
  460. cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io;
  461. } else {
  462. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id);
  463. if (likely(stat(filename, &buf) != -1)) {
  464. cg->throttle_io_service_bytes.filename = strdupz(filename);
  465. cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io;
  466. }
  467. }
  468. }
  469. if (unlikely(cgroup_enable_blkio_throttle_ops && !cg->throttle_io_serviced.filename)) {
  470. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced_recursive", cgroup_blkio_base, cg->id);
  471. if (unlikely(stat(filename, &buf) != -1)) {
  472. cg->throttle_io_serviced.filename = strdupz(filename);
  473. cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops;
  474. } else {
  475. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id);
  476. if (likely(stat(filename, &buf) != -1)) {
  477. cg->throttle_io_serviced.filename = strdupz(filename);
  478. cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops;
  479. }
  480. }
  481. }
  482. if (unlikely(cgroup_enable_blkio_merged_ops && !cg->io_merged.filename)) {
  483. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged_recursive", cgroup_blkio_base, cg->id);
  484. if (unlikely(stat(filename, &buf) != -1)) {
  485. cg->io_merged.filename = strdupz(filename);
  486. cg->io_merged.enabled = cgroup_enable_blkio_merged_ops;
  487. } else {
  488. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id);
  489. if (likely(stat(filename, &buf) != -1)) {
  490. cg->io_merged.filename = strdupz(filename);
  491. cg->io_merged.enabled = cgroup_enable_blkio_merged_ops;
  492. }
  493. }
  494. }
  495. if (unlikely(cgroup_enable_blkio_queued_ops && !cg->io_queued.filename)) {
  496. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued_recursive", cgroup_blkio_base, cg->id);
  497. if (unlikely(stat(filename, &buf) != -1)) {
  498. cg->io_queued.filename = strdupz(filename);
  499. cg->io_queued.enabled = cgroup_enable_blkio_queued_ops;
  500. } else {
  501. snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id);
  502. if (likely(stat(filename, &buf) != -1)) {
  503. cg->io_queued.filename = strdupz(filename);
  504. cg->io_queued.enabled = cgroup_enable_blkio_queued_ops;
  505. }
  506. }
  507. }
  508. // Pids
  509. if (unlikely(!cg->pids.pids_current_filename)) {
  510. snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_pids_base, cg->id);
  511. if (likely(stat(filename, &buf) != -1)) {
  512. cg->pids.pids_current_filename = strdupz(filename);
  513. }
  514. }
  515. }
  516. static inline void discovery_update_filenames_cgroup_v2(struct cgroup *cg) {
  517. char filename[FILENAME_MAX + 1];
  518. struct stat buf;
  519. // CPU
  520. if (unlikely((cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) && !cg->cpuacct_stat.filename)) {
  521. snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_unified_base, cg->id);
  522. if (likely(stat(filename, &buf) != -1)) {
  523. cg->cpuacct_stat.filename = strdupz(filename);
  524. cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat;
  525. cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling;
  526. cg->filename_cpuset_cpus = NULL;
  527. cg->filename_cpu_cfs_period = NULL;
  528. snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id);
  529. cg->filename_cpu_cfs_quota = strdupz(filename);
  530. }
  531. }
  532. if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) {
  533. snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id);
  534. if (likely(stat(filename, &buf) != -1)) {
  535. cg->cpuacct_cpu_shares.filename = strdupz(filename);
  536. cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares;
  537. }
  538. }
  539. // Memory
  540. // FIXME: this if condition!
  541. if (unlikely(
  542. (cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed &&
  543. (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) {
  544. snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id);
  545. if (likely(stat(filename, &buf) != -1)) {
  546. cg->memory.filename_detailed = strdupz(filename);
  547. cg->memory.enabled_detailed =
  548. (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_AUTO;
  549. }
  550. }
  551. if (unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) {
  552. snprintfz(filename, FILENAME_MAX, "%s%s/memory.current", cgroup_unified_base, cg->id);
  553. if (likely(stat(filename, &buf) != -1)) {
  554. cg->memory.filename_usage_in_bytes = strdupz(filename);
  555. cg->memory.enabled_usage_in_bytes = cgroup_enable_memory;
  556. snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id);
  557. cg->filename_memory_limit = strdupz(filename);
  558. }
  559. }
  560. if (unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) {
  561. snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.current", cgroup_unified_base, cg->id);
  562. if (likely(stat(filename, &buf) != -1)) {
  563. cg->memory.filename_msw_usage_in_bytes = strdupz(filename);
  564. cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap;
  565. snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id);
  566. cg->filename_memoryswap_limit = strdupz(filename);
  567. }
  568. }
  569. // Blkio
  570. if (unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) {
  571. snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id);
  572. if (likely(stat(filename, &buf) != -1)) {
  573. cg->io_service_bytes.filename = strdupz(filename);
  574. cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
  575. }
  576. }
  577. if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) {
  578. snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id);
  579. if (likely(stat(filename, &buf) != -1)) {
  580. cg->io_serviced.filename = strdupz(filename);
  581. cg->io_serviced.enabled = cgroup_enable_blkio_ops;
  582. }
  583. }
  584. // PSI
  585. if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) {
  586. snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id);
  587. if (likely(stat(filename, &buf) != -1)) {
  588. cg->cpu_pressure.filename = strdupz(filename);
  589. cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu;
  590. cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO;
  591. }
  592. }
  593. if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) {
  594. snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id);
  595. if (likely(stat(filename, &buf) != -1)) {
  596. cg->io_pressure.filename = strdupz(filename);
  597. cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some;
  598. cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full;
  599. }
  600. }
  601. if (unlikely(
  602. (cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) &&
  603. !cg->memory_pressure.filename)) {
  604. snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id);
  605. if (likely(stat(filename, &buf) != -1)) {
  606. cg->memory_pressure.filename = strdupz(filename);
  607. cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some;
  608. cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full;
  609. }
  610. }
  611. if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) {
  612. snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id);
  613. if (likely(stat(filename, &buf) != -1)) {
  614. cg->irq_pressure.filename = strdupz(filename);
  615. cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some;
  616. cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full;
  617. }
  618. }
  619. // Pids
  620. if (unlikely(!cg->pids.pids_current_filename)) {
  621. snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_unified_base, cg->id);
  622. if (likely(stat(filename, &buf) != -1)) {
  623. cg->pids.pids_current_filename = strdupz(filename);
  624. }
  625. }
  626. }
  627. static inline void discovery_update_filenames_all_cgroups() {
  628. for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
  629. if (unlikely(!cg->available || !cg->enabled || cg->pending_renames))
  630. continue;
  631. if (!cgroup_use_unified_cgroups)
  632. discovery_update_filenames_cgroup_v1(cg);
  633. else if (likely(cgroup_unified_exist))
  634. discovery_update_filenames_cgroup_v2(cg);
  635. }
  636. }
  637. static inline void discovery_cleanup_all_cgroups() {
  638. struct cgroup *cg = discovered_cgroup_root, *last = NULL;
  639. for(; cg ;) {
  640. if(!cg->available) {
  641. // enable the first duplicate cgroup
  642. {
  643. struct cgroup *t;
  644. for (t = discovered_cgroup_root; t; t = t->discovered_next) {
  645. if (t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE &&
  646. (is_cgroup_systemd_service(t) == is_cgroup_systemd_service(cg)) &&
  647. t->hash_chart_id == cg->hash_chart_id && !strcmp(t->chart_id, cg->chart_id)) {
  648. netdata_log_debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id);
  649. t->enabled = 1;
  650. t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE;
  651. break;
  652. }
  653. }
  654. }
  655. if(!last)
  656. discovered_cgroup_root = cg->discovered_next;
  657. else
  658. last->discovered_next = cg->discovered_next;
  659. cgroup_free(cg);
  660. if(!last)
  661. cg = discovered_cgroup_root;
  662. else
  663. cg = last->discovered_next;
  664. }
  665. else {
  666. last = cg;
  667. cg = cg->discovered_next;
  668. }
  669. }
  670. }
  671. static inline void discovery_copy_discovered_cgroups_to_reader() {
  672. netdata_log_debug(D_CGROUP, "copy discovered cgroups to the main group list");
  673. struct cgroup *cg;
  674. for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
  675. cg->next = cg->discovered_next;
  676. }
  677. cgroup_root = discovered_cgroup_root;
  678. }
  679. static inline void discovery_share_cgroups_with_ebpf() {
  680. struct cgroup *cg;
  681. int count;
  682. struct stat buf;
  683. if (shm_mutex_cgroup_ebpf == SEM_FAILED) {
  684. return;
  685. }
  686. sem_wait(shm_mutex_cgroup_ebpf);
  687. for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) {
  688. netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count];
  689. char *prefix = (is_cgroup_systemd_service(cg)) ? services_chart_id_prefix : cgroup_chart_id_prefix;
  690. snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_id);
  691. ptr->hash = simple_hash(ptr->name);
  692. ptr->options = cg->options;
  693. ptr->enabled = cg->enabled;
  694. if (cgroup_use_unified_cgroups) {
  695. snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id);
  696. if (likely(stat(ptr->path, &buf) == -1)) {
  697. ptr->path[0] = '\0';
  698. ptr->enabled = 0;
  699. }
  700. } else {
  701. is_cgroup_procs_exist(ptr, cg->id);
  702. }
  703. netdata_log_debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled);
  704. }
  705. shm_cgroup_ebpf.header->cgroup_root_count = count;
  706. sem_post(shm_mutex_cgroup_ebpf);
  707. }
  708. static inline void discovery_find_all_cgroups_v1() {
  709. if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) {
  710. if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
  711. cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO;
  712. collector_error("CGROUP: disabled cpu statistics.");
  713. }
  714. }
  715. if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io ||
  716. cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) {
  717. if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
  718. cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io =
  719. cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops =
  720. CONFIG_BOOLEAN_NO;
  721. collector_error("CGROUP: disabled blkio statistics.");
  722. }
  723. }
  724. if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) {
  725. if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
  726. cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt =
  727. CONFIG_BOOLEAN_NO;
  728. collector_error("CGROUP: disabled memory statistics.");
  729. }
  730. }
  731. if (cgroup_search_in_devices) {
  732. if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
  733. cgroup_search_in_devices = 0;
  734. collector_error("CGROUP: disabled devices statistics.");
  735. }
  736. }
  737. }
  738. static inline void discovery_find_all_cgroups_v2() {
  739. if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
  740. cgroup_unified_exist = CONFIG_BOOLEAN_NO;
  741. collector_error("CGROUP: disabled unified cgroups statistics.");
  742. }
  743. }
  744. static int is_digits_only(const char *s) {
  745. do {
  746. if (!isdigit(*s++)) {
  747. return 0;
  748. }
  749. } while (*s);
  750. return 1;
  751. }
  752. static int is_cgroup_k8s_container(const char *id) {
  753. // examples:
  754. // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147
  755. const char *p = id;
  756. const char *pp = NULL;
  757. int i = 0;
  758. size_t l = 3; // pod
  759. while ((p = strstr(p, "pod"))) {
  760. i++;
  761. p += l;
  762. pp = p;
  763. }
  764. return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp);
  765. }
  766. #define TASK_COMM_LEN 16
  767. static int k8s_get_container_first_proc_comm(const char *id, char *comm) {
  768. if (!is_cgroup_k8s_container(id)) {
  769. return 1;
  770. }
  771. static procfile *ff = NULL;
  772. char filename[FILENAME_MAX + 1];
  773. snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id);
  774. ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG);
  775. if (unlikely(!ff)) {
  776. netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename);
  777. return 1;
  778. }
  779. ff = procfile_readall(ff);
  780. if (unlikely(!ff)) {
  781. netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename);
  782. return 1;
  783. }
  784. unsigned long lines = procfile_lines(ff);
  785. if (likely(lines < 2)) {
  786. return 1;
  787. }
  788. char *pid = procfile_lineword(ff, 0, 0);
  789. if (!pid || !*pid) {
  790. return 1;
  791. }
  792. snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid);
  793. ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT);
  794. if (unlikely(!ff)) {
  795. netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename);
  796. return 1;
  797. }
  798. ff = procfile_readall(ff);
  799. if (unlikely(!ff)) {
  800. netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename);
  801. return 1;
  802. }
  803. lines = procfile_lines(ff);
  804. if (unlikely(lines != 2)) {
  805. return 1;
  806. }
  807. char *proc_comm = procfile_lineword(ff, 0, 0);
  808. if (!proc_comm || !*proc_comm) {
  809. return 1;
  810. }
  811. strncpyz(comm, proc_comm, TASK_COMM_LEN);
  812. return 0;
  813. }
  814. static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) {
  815. if (!cg->first_time_seen) {
  816. return;
  817. }
  818. cg->first_time_seen = 0;
  819. char comm[TASK_COMM_LEN + 1];
  820. if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) {
  821. if (strstr(cg->id, "kubepods")) {
  822. cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S;
  823. } else {
  824. cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN;
  825. }
  826. }
  827. if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) {
  828. // container initialization may take some time when CPU % is high
  829. // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number)
  830. if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) {
  831. cg->first_time_seen = 1;
  832. return;
  833. }
  834. if (!strcmp(comm, "pause")) {
  835. // a container that holds the network namespace for the pod
  836. // we don't need to collect its metrics
  837. cg->processed = 1;
  838. return;
  839. }
  840. }
  841. if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) {
  842. netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_id);
  843. convert_cgroup_to_systemd_service(cg);
  844. return;
  845. }
  846. if (matches_enabled_cgroup_renames(cg->id)) {
  847. netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_id);
  848. if (is_inside_k8s && is_cgroup_k8s_container(cg->id)) {
  849. // it may take up to a minute for the K8s API to return data for the container
  850. // tested on AWS K8s cluster with 100% CPU utilization
  851. cg->pending_renames = 9; // 1.5 minute
  852. } else {
  853. cg->pending_renames = 2;
  854. }
  855. }
  856. }
  857. static int discovery_is_cgroup_duplicate(struct cgroup *cg) {
  858. // https://github.com/netdata/netdata/issues/797#issuecomment-241248884
  859. struct cgroup *c;
  860. for (c = discovered_cgroup_root; c; c = c->discovered_next) {
  861. if (c != cg && c->enabled && (is_cgroup_systemd_service(c) == is_cgroup_systemd_service(cg)) &&
  862. c->hash_chart_id == cg->hash_chart_id && !strcmp(c->chart_id, cg->chart_id)) {
  863. collector_error(
  864. "CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.",
  865. cg->chart_id,
  866. c->id,
  867. cg->id);
  868. return 1;
  869. }
  870. }
  871. return 0;
  872. }
  873. // ----------------------------------------------------------------------------
  874. // cgroup network interfaces
  875. #define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048
  876. static inline void read_cgroup_network_interfaces(struct cgroup *cg) {
  877. netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id);
  878. pid_t cgroup_pid;
  879. char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
  880. if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
  881. snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id);
  882. }
  883. else {
  884. snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id);
  885. }
  886. netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id);
  887. FILE *fp_child_input, *fp_child_output;
  888. (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier);
  889. if(!fp_child_output) {
  890. collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier);
  891. return;
  892. }
  893. char *s;
  894. char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
  895. while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) {
  896. trim(s);
  897. if(*s && *s != '\n') {
  898. char *t = s;
  899. while(*t && *t != ' ') t++;
  900. if(*t == ' ') {
  901. *t = '\0';
  902. t++;
  903. }
  904. if(!*s) {
  905. collector_error("CGROUP: empty host interface returned by script");
  906. continue;
  907. }
  908. if(!*t) {
  909. collector_error("CGROUP: empty guest interface returned by script");
  910. continue;
  911. }
  912. struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface));
  913. i->host_device = strdupz(s);
  914. i->container_device = strdupz(t);
  915. i->next = cg->interfaces;
  916. cg->interfaces = i;
  917. collector_info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device);
  918. // register a device rename to proc_net_dev.c
  919. netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id, cg->chart_labels,
  920. k8s_is_kubepod(cg) ? "k8s." : "", cgroup_netdev_get(cg));
  921. }
  922. }
  923. netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
  924. // netdata_log_debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id);
  925. }
  926. static inline void discovery_process_cgroup(struct cgroup *cg) {
  927. if (!cg->available || cg->processed) {
  928. return;
  929. }
  930. if (cg->first_time_seen) {
  931. worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME);
  932. discovery_process_first_time_seen_cgroup(cg);
  933. if (unlikely(cg->first_time_seen || cg->processed)) {
  934. return;
  935. }
  936. }
  937. if (cg->pending_renames) {
  938. worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME);
  939. discovery_rename_cgroup(cg);
  940. if (unlikely(cg->pending_renames || cg->processed)) {
  941. return;
  942. }
  943. }
  944. cg->processed = 1;
  945. if ((strlen(cg->chart_id) + strlen(cgroup_chart_id_prefix)) >= RRD_ID_LENGTH_MAX) {
  946. collector_info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id);
  947. return;
  948. }
  949. if (is_cgroup_systemd_service(cg)) {
  950. if (discovery_is_cgroup_duplicate(cg)) {
  951. cg->enabled = 0;
  952. cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
  953. return;
  954. }
  955. if (!cg->chart_labels)
  956. cg->chart_labels = rrdlabels_create();
  957. rrdlabels_add(cg->chart_labels, "service_name", cg->name, RRDLABEL_SRC_AUTO);
  958. cg->enabled = 1;
  959. return;
  960. }
  961. if (!(cg->enabled = matches_enabled_cgroup_names(cg->name))) {
  962. netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->name);
  963. return;
  964. }
  965. if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) {
  966. netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->name);
  967. return;
  968. }
  969. if (discovery_is_cgroup_duplicate(cg)) {
  970. cg->enabled = 0;
  971. cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
  972. return;
  973. }
  974. if (!cg->chart_labels)
  975. cg->chart_labels = rrdlabels_create();
  976. if (!k8s_is_kubepod(cg)) {
  977. rrdlabels_add(cg->chart_labels, "cgroup_name", cg->name, RRDLABEL_SRC_AUTO);
  978. if (!rrdlabels_exist(cg->chart_labels, "image"))
  979. rrdlabels_add(cg->chart_labels, "image", "", RRDLABEL_SRC_AUTO);
  980. }
  981. worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK);
  982. read_cgroup_network_interfaces(cg);
  983. }
  984. static inline void discovery_find_all_cgroups() {
  985. netdata_log_debug(D_CGROUP, "searching for cgroups");
  986. worker_is_busy(WORKER_DISCOVERY_INIT);
  987. discovery_mark_as_unavailable_all_cgroups();
  988. worker_is_busy(WORKER_DISCOVERY_FIND);
  989. if (!cgroup_use_unified_cgroups) {
  990. discovery_find_all_cgroups_v1();
  991. } else {
  992. discovery_find_all_cgroups_v2();
  993. }
  994. for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
  995. worker_is_busy(WORKER_DISCOVERY_PROCESS);
  996. discovery_process_cgroup(cg);
  997. }
  998. worker_is_busy(WORKER_DISCOVERY_UPDATE);
  999. discovery_update_filenames_all_cgroups();
  1000. worker_is_busy(WORKER_DISCOVERY_LOCK);
  1001. uv_mutex_lock(&cgroup_root_mutex);
  1002. worker_is_busy(WORKER_DISCOVERY_CLEANUP);
  1003. discovery_cleanup_all_cgroups();
  1004. worker_is_busy(WORKER_DISCOVERY_COPY);
  1005. discovery_copy_discovered_cgroups_to_reader();
  1006. uv_mutex_unlock(&cgroup_root_mutex);
  1007. worker_is_busy(WORKER_DISCOVERY_SHARE);
  1008. discovery_share_cgroups_with_ebpf();
  1009. netdata_log_debug(D_CGROUP, "done searching for cgroups");
  1010. }
  1011. void cgroup_discovery_worker(void *ptr)
  1012. {
  1013. UNUSED(ptr);
  1014. worker_register("CGROUPSDISC");
  1015. worker_register_job_name(WORKER_DISCOVERY_INIT, "init");
  1016. worker_register_job_name(WORKER_DISCOVERY_FIND, "find");
  1017. worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process");
  1018. worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename");
  1019. worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network");
  1020. worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new");
  1021. worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update");
  1022. worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup");
  1023. worker_register_job_name(WORKER_DISCOVERY_COPY, "copy");
  1024. worker_register_job_name(WORKER_DISCOVERY_SHARE, "share");
  1025. worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock");
  1026. entrypoint_parent_process_comm = simple_pattern_create(
  1027. " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3)
  1028. " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961
  1029. NULL,
  1030. SIMPLE_PATTERN_EXACT, true);
  1031. service_register(SERVICE_THREAD_TYPE_LIBUV, NULL, NULL, NULL, false);
  1032. while (service_running(SERVICE_COLLECTORS)) {
  1033. worker_is_idle();
  1034. uv_mutex_lock(&discovery_thread.mutex);
  1035. uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex);
  1036. uv_mutex_unlock(&discovery_thread.mutex);
  1037. if (unlikely(!service_running(SERVICE_COLLECTORS)))
  1038. break;
  1039. discovery_find_all_cgroups();
  1040. }
  1041. collector_info("discovery thread stopped");
  1042. worker_unregister();
  1043. service_exits();
  1044. __atomic_store_n(&discovery_thread.exited,1,__ATOMIC_RELAXED);
  1045. }