signals.c 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "common.h"
  3. typedef enum signal_action {
  4. NETDATA_SIGNAL_END_OF_LIST,
  5. NETDATA_SIGNAL_IGNORE,
  6. NETDATA_SIGNAL_EXIT_CLEANLY,
  7. NETDATA_SIGNAL_SAVE_DATABASE,
  8. NETDATA_SIGNAL_REOPEN_LOGS,
  9. NETDATA_SIGNAL_RELOAD_HEALTH,
  10. NETDATA_SIGNAL_FATAL,
  11. NETDATA_SIGNAL_CHILD,
  12. } SIGNAL_ACTION;
  13. static struct {
  14. int signo; // the signal
  15. const char *name; // the name of the signal
  16. size_t count; // the number of signals received
  17. SIGNAL_ACTION action; // the action to take
  18. } signals_waiting[] = {
  19. { SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE },
  20. { SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
  21. { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
  22. { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
  23. { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS },
  24. { SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE },
  25. { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH },
  26. { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL },
  27. { SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD },
  28. // terminator
  29. { 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST }
  30. };
  31. static void signal_handler(int signo) {
  32. // find the entry in the list
  33. int i;
  34. for(i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST ; i++) {
  35. if(unlikely(signals_waiting[i].signo == signo)) {
  36. signals_waiting[i].count++;
  37. if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) {
  38. char buffer[200 + 1];
  39. snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name);
  40. if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) {
  41. // nothing to do - we cannot write but there is no way to complain about it
  42. ;
  43. }
  44. }
  45. return;
  46. }
  47. }
  48. }
  49. void signals_block(void) {
  50. sigset_t sigset;
  51. sigfillset(&sigset);
  52. if(pthread_sigmask(SIG_BLOCK, &sigset, NULL) == -1)
  53. netdata_log_error("SIGNAL: Could not block signals for threads");
  54. }
  55. void signals_unblock(void) {
  56. sigset_t sigset;
  57. sigfillset(&sigset);
  58. if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) {
  59. netdata_log_error("SIGNAL: Could not unblock signals for threads");
  60. }
  61. }
  62. void signals_init(void) {
  63. // Catch signals which we want to use
  64. struct sigaction sa;
  65. sa.sa_flags = 0;
  66. // ignore all signals while we run in a signal handler
  67. sigfillset(&sa.sa_mask);
  68. int i;
  69. for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
  70. switch (signals_waiting[i].action) {
  71. case NETDATA_SIGNAL_IGNORE:
  72. sa.sa_handler = SIG_IGN;
  73. break;
  74. default:
  75. sa.sa_handler = signal_handler;
  76. break;
  77. }
  78. if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
  79. netdata_log_error("SIGNAL: Failed to change signal handler for: %s", signals_waiting[i].name);
  80. }
  81. }
  82. void signals_restore_SIGCHLD(void)
  83. {
  84. struct sigaction sa;
  85. sa.sa_flags = 0;
  86. sigfillset(&sa.sa_mask);
  87. sa.sa_handler = signal_handler;
  88. if(sigaction(SIGCHLD, &sa, NULL) == -1)
  89. netdata_log_error("SIGNAL: Failed to change signal handler for: SIGCHLD");
  90. }
  91. void signals_reset(void) {
  92. struct sigaction sa;
  93. sigemptyset(&sa.sa_mask);
  94. sa.sa_handler = SIG_DFL;
  95. sa.sa_flags = 0;
  96. int i;
  97. for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
  98. if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
  99. netdata_log_error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name);
  100. }
  101. }
  102. // reap_child reaps the child identified by pid.
  103. static void reap_child(pid_t pid) {
  104. siginfo_t i;
  105. errno = 0;
  106. netdata_log_debug(D_CHILDS, "SIGNAL: reap_child(%d)...", pid);
  107. if (netdata_waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
  108. if (errno != ECHILD)
  109. netdata_log_error("SIGNAL: waitid(%d): failed to wait for child", pid);
  110. else
  111. netdata_log_info("SIGNAL: waitid(%d): failed - it seems the child is already reaped", pid);
  112. return;
  113. }
  114. else if (i.si_pid == 0) {
  115. // Process didn't exit, this shouldn't happen.
  116. netdata_log_error("SIGNAL: waitid(%d): reports pid 0 - child has not exited", pid);
  117. return;
  118. }
  119. switch (i.si_code) {
  120. case CLD_EXITED:
  121. netdata_log_info("SIGNAL: reap_child(%d) exited with code: %d", pid, i.si_status);
  122. break;
  123. case CLD_KILLED:
  124. netdata_log_info("SIGNAL: reap_child(%d) killed by signal: %d", pid, i.si_status);
  125. break;
  126. case CLD_DUMPED:
  127. netdata_log_info("SIGNAL: reap_child(%d) dumped core by signal: %d", pid, i.si_status);
  128. break;
  129. case CLD_STOPPED:
  130. netdata_log_info("SIGNAL: reap_child(%d) stopped by signal: %d", pid, i.si_status);
  131. break;
  132. case CLD_TRAPPED:
  133. netdata_log_info("SIGNAL: reap_child(%d) trapped by signal: %d", pid, i.si_status);
  134. break;
  135. case CLD_CONTINUED:
  136. netdata_log_info("SIGNAL: reap_child(%d) continued by signal: %d", pid, i.si_status);
  137. break;
  138. default:
  139. netdata_log_info("SIGNAL: reap_child(%d) gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
  140. break;
  141. }
  142. }
  143. // reap_children reaps all pending children which are not managed by myp.
  144. static void reap_children() {
  145. siginfo_t i;
  146. while(1) {
  147. i.si_pid = 0;
  148. if (netdata_waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1 || i.si_pid == 0)
  149. // nothing to do
  150. return;
  151. reap_child(i.si_pid);
  152. }
  153. }
  154. void signals_handle(void) {
  155. while(1) {
  156. // pause() causes the calling process (or thread) to sleep until a signal
  157. // is delivered that either terminates the process or causes the invocation
  158. // of a signal-catching function.
  159. if(pause() == -1 && errno == EINTR) {
  160. // loop once, but keep looping while signals are coming in
  161. // this is needed because a few operations may take some time
  162. // so we need to check for new signals before pausing again
  163. int found = 1;
  164. while(found) {
  165. found = 0;
  166. // execute the actions of the signals
  167. int i;
  168. for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
  169. if (signals_waiting[i].count) {
  170. found = 1;
  171. signals_waiting[i].count = 0;
  172. const char *name = signals_waiting[i].name;
  173. switch (signals_waiting[i].action) {
  174. case NETDATA_SIGNAL_RELOAD_HEALTH:
  175. error_log_limit_unlimited();
  176. netdata_log_info("SIGNAL: Received %s. Reloading HEALTH configuration...", name);
  177. error_log_limit_reset();
  178. execute_command(CMD_RELOAD_HEALTH, NULL, NULL);
  179. break;
  180. case NETDATA_SIGNAL_SAVE_DATABASE:
  181. error_log_limit_unlimited();
  182. netdata_log_info("SIGNAL: Received %s. Saving databases...", name);
  183. error_log_limit_reset();
  184. execute_command(CMD_SAVE_DATABASE, NULL, NULL);
  185. break;
  186. case NETDATA_SIGNAL_REOPEN_LOGS:
  187. error_log_limit_unlimited();
  188. netdata_log_info("SIGNAL: Received %s. Reopening all log files...", name);
  189. error_log_limit_reset();
  190. execute_command(CMD_REOPEN_LOGS, NULL, NULL);
  191. break;
  192. case NETDATA_SIGNAL_EXIT_CLEANLY:
  193. error_log_limit_unlimited();
  194. netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name);
  195. commands_exit();
  196. netdata_cleanup_and_exit(0);
  197. exit(0);
  198. break;
  199. case NETDATA_SIGNAL_FATAL:
  200. fatal("SIGNAL: Received %s. netdata now exits.", name);
  201. break;
  202. case NETDATA_SIGNAL_CHILD:
  203. reap_children();
  204. break;
  205. default:
  206. netdata_log_info("SIGNAL: Received %s. No signal handler configured. Ignoring it.", name);
  207. break;
  208. }
  209. }
  210. }
  211. }
  212. }
  213. else
  214. netdata_log_error("SIGNAL: pause() returned but it was not interrupted by a signal.");
  215. }
  216. }