ebpf_softirq.c 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "ebpf.h"
  3. #include "ebpf_softirq.h"
  4. struct config softirq_config = { .first_section = NULL,
  5. .last_section = NULL,
  6. .mutex = NETDATA_MUTEX_INITIALIZER,
  7. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  8. .rwlock = AVL_LOCK_INITIALIZER } };
  9. #define SOFTIRQ_MAP_LATENCY 0
  10. static ebpf_local_maps_t softirq_maps[] = {
  11. {
  12. .name = "tbl_softirq",
  13. .internal_input = NETDATA_SOFTIRQ_MAX_IRQS,
  14. .user_input = 0,
  15. .type = NETDATA_EBPF_MAP_STATIC,
  16. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED,
  17. #ifdef LIBBPF_MAJOR_VERSION
  18. .map_type = BPF_MAP_TYPE_PERCPU_ARRAY
  19. #endif
  20. },
  21. /* end */
  22. {
  23. .name = NULL,
  24. .internal_input = 0,
  25. .user_input = 0,
  26. .type = NETDATA_EBPF_MAP_CONTROLLER,
  27. .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED,
  28. #ifdef LIBBPF_MAJOR_VERSION
  29. .map_type = BPF_MAP_TYPE_PERCPU_ARRAY
  30. #endif
  31. }
  32. };
  33. #define SOFTIRQ_TP_CLASS_IRQ "irq"
  34. static ebpf_tracepoint_t softirq_tracepoints[] = {
  35. {.enabled = false, .class = SOFTIRQ_TP_CLASS_IRQ, .event = "softirq_entry"},
  36. {.enabled = false, .class = SOFTIRQ_TP_CLASS_IRQ, .event = "softirq_exit"},
  37. /* end */
  38. {.enabled = false, .class = NULL, .event = NULL}
  39. };
  40. // these must be in the order defined by the kernel:
  41. // https://elixir.bootlin.com/linux/v5.12.19/source/include/trace/events/irq.h#L13
  42. static softirq_val_t softirq_vals[] = {
  43. {.name = "HI", .latency = 0},
  44. {.name = "TIMER", .latency = 0},
  45. {.name = "NET_TX", .latency = 0},
  46. {.name = "NET_RX", .latency = 0},
  47. {.name = "BLOCK", .latency = 0},
  48. {.name = "IRQ_POLL", .latency = 0},
  49. {.name = "TASKLET", .latency = 0},
  50. {.name = "SCHED", .latency = 0},
  51. {.name = "HRTIMER", .latency = 0},
  52. {.name = "RCU", .latency = 0},
  53. };
  54. // tmp store for soft IRQ values we get from a per-CPU eBPF map.
  55. static softirq_ebpf_val_t *softirq_ebpf_vals = NULL;
  56. /**
  57. * Obsolete global
  58. *
  59. * Obsolete global charts created by thread.
  60. *
  61. * @param em a pointer to `struct ebpf_module`
  62. */
  63. static void ebpf_obsolete_softirq_global(ebpf_module_t *em)
  64. {
  65. ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP,
  66. "softirq_latency",
  67. "",
  68. "Software IRQ latency",
  69. EBPF_COMMON_DIMENSION_MILLISECONDS,
  70. "softirqs",
  71. NETDATA_EBPF_CHART_TYPE_STACKED,
  72. NULL,
  73. NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS+1,
  74. em->update_every);
  75. }
  76. /**
  77. * Cleanup
  78. *
  79. * Clean up allocated memory.
  80. *
  81. * @param ptr thread data.
  82. */
  83. static void softirq_cleanup(void *ptr)
  84. {
  85. ebpf_module_t *em = (ebpf_module_t *)ptr;
  86. if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) {
  87. pthread_mutex_lock(&lock);
  88. ebpf_obsolete_softirq_global(em);
  89. pthread_mutex_unlock(&lock);
  90. fflush(stdout);
  91. }
  92. ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE);
  93. if (em->objects) {
  94. ebpf_unload_legacy_code(em->objects, em->probe_links);
  95. em->objects = NULL;
  96. em->probe_links = NULL;
  97. }
  98. for (int i = 0; softirq_tracepoints[i].class != NULL; i++) {
  99. ebpf_disable_tracepoint(&softirq_tracepoints[i]);
  100. }
  101. freez(softirq_ebpf_vals);
  102. softirq_ebpf_vals = NULL;
  103. pthread_mutex_lock(&ebpf_exit_cleanup);
  104. em->enabled = NETDATA_THREAD_EBPF_STOPPED;
  105. ebpf_update_stats(&plugin_statistics, em);
  106. pthread_mutex_unlock(&ebpf_exit_cleanup);
  107. }
  108. /*****************************************************************
  109. * MAIN LOOP
  110. *****************************************************************/
  111. /**
  112. * Read Latency Map
  113. *
  114. * Read data from kernel ring to plot for users.
  115. *
  116. * @param maps_per_core do I need to read all cores?
  117. */
  118. static void softirq_read_latency_map(int maps_per_core)
  119. {
  120. int fd = softirq_maps[SOFTIRQ_MAP_LATENCY].map_fd;
  121. int i;
  122. size_t length = sizeof(softirq_ebpf_val_t);
  123. if (maps_per_core)
  124. length *= ebpf_nprocs;
  125. for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) {
  126. int test = bpf_map_lookup_elem(fd, &i, softirq_ebpf_vals);
  127. if (unlikely(test < 0)) {
  128. continue;
  129. }
  130. uint64_t total_latency = 0;
  131. int cpu_i;
  132. int end = (maps_per_core) ? ebpf_nprocs : 1;
  133. for (cpu_i = 0; cpu_i < end; cpu_i++) {
  134. total_latency += softirq_ebpf_vals[cpu_i].latency/1000;
  135. }
  136. softirq_vals[i].latency = total_latency;
  137. memset(softirq_ebpf_vals, 0, length);
  138. }
  139. }
  140. static void softirq_create_charts(int update_every)
  141. {
  142. ebpf_create_chart(
  143. NETDATA_EBPF_SYSTEM_GROUP,
  144. "softirq_latency",
  145. "Software IRQ latency",
  146. EBPF_COMMON_DIMENSION_MILLISECONDS,
  147. "softirqs",
  148. NULL,
  149. NETDATA_EBPF_CHART_TYPE_STACKED,
  150. NETDATA_CHART_PRIO_SYSTEM_SOFTIRQS+1,
  151. NULL, NULL, 0, update_every,
  152. NETDATA_EBPF_MODULE_NAME_SOFTIRQ
  153. );
  154. fflush(stdout);
  155. }
  156. static void softirq_create_dims()
  157. {
  158. uint32_t i;
  159. for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) {
  160. ebpf_write_global_dimension(
  161. softirq_vals[i].name, softirq_vals[i].name,
  162. ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]
  163. );
  164. }
  165. }
  166. static inline void softirq_write_dims()
  167. {
  168. uint32_t i;
  169. for (i = 0; i < NETDATA_SOFTIRQ_MAX_IRQS; i++) {
  170. write_chart_dimension(softirq_vals[i].name, softirq_vals[i].latency);
  171. }
  172. }
  173. /**
  174. * Main loop for this collector.
  175. */
  176. static void softirq_collector(ebpf_module_t *em)
  177. {
  178. softirq_ebpf_vals = callocz(ebpf_nprocs, sizeof(softirq_ebpf_val_t));
  179. // create chart and static dims.
  180. pthread_mutex_lock(&lock);
  181. softirq_create_charts(em->update_every);
  182. softirq_create_dims();
  183. ebpf_update_stats(&plugin_statistics, em);
  184. ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD);
  185. pthread_mutex_unlock(&lock);
  186. // loop and read from published data until ebpf plugin is closed.
  187. heartbeat_t hb;
  188. heartbeat_init(&hb);
  189. int update_every = em->update_every;
  190. int counter = update_every - 1;
  191. int maps_per_core = em->maps_per_core;
  192. //This will be cancelled by its parent
  193. uint32_t running_time = 0;
  194. uint32_t lifetime = em->lifetime;
  195. while (!ebpf_plugin_exit && running_time < lifetime) {
  196. (void)heartbeat_next(&hb, USEC_PER_SEC);
  197. if (ebpf_plugin_exit || ++counter != update_every)
  198. continue;
  199. counter = 0;
  200. softirq_read_latency_map(maps_per_core);
  201. pthread_mutex_lock(&lock);
  202. // write dims now for all hitherto discovered IRQs.
  203. ebpf_write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency", "");
  204. softirq_write_dims();
  205. ebpf_write_end_chart();
  206. pthread_mutex_unlock(&lock);
  207. pthread_mutex_lock(&ebpf_exit_cleanup);
  208. if (running_time && !em->running_time)
  209. running_time = update_every;
  210. else
  211. running_time += update_every;
  212. em->running_time = running_time;
  213. pthread_mutex_unlock(&ebpf_exit_cleanup);
  214. }
  215. }
  216. /*****************************************************************
  217. * EBPF SOFTIRQ THREAD
  218. *****************************************************************/
  219. /**
  220. * Soft IRQ latency thread.
  221. *
  222. * @param ptr a `ebpf_module_t *`.
  223. * @return always NULL.
  224. */
  225. void *ebpf_softirq_thread(void *ptr)
  226. {
  227. netdata_thread_cleanup_push(softirq_cleanup, ptr);
  228. ebpf_module_t *em = (ebpf_module_t *)ptr;
  229. em->maps = softirq_maps;
  230. if (ebpf_enable_tracepoints(softirq_tracepoints) == 0) {
  231. goto endsoftirq;
  232. }
  233. #ifdef LIBBPF_MAJOR_VERSION
  234. ebpf_define_map_type(em->maps, em->maps_per_core, running_on_kernel);
  235. #endif
  236. em->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &em->objects);
  237. if (!em->probe_links) {
  238. goto endsoftirq;
  239. }
  240. softirq_collector(em);
  241. endsoftirq:
  242. ebpf_update_disabled_plugin_stats(em);
  243. netdata_thread_cleanup_pop(1);
  244. return NULL;
  245. }