123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285 |
- // SPDX-License-Identifier: GPL-3.0-or-later
- #include "common.h"
- static int reaper_enabled = 0;
- typedef enum signal_action {
- NETDATA_SIGNAL_END_OF_LIST,
- NETDATA_SIGNAL_IGNORE,
- NETDATA_SIGNAL_EXIT_CLEANLY,
- NETDATA_SIGNAL_SAVE_DATABASE,
- NETDATA_SIGNAL_REOPEN_LOGS,
- NETDATA_SIGNAL_RELOAD_HEALTH,
- NETDATA_SIGNAL_FATAL,
- NETDATA_SIGNAL_CHILD,
- } SIGNAL_ACTION;
- static struct {
- int signo; // the signal
- const char *name; // the name of the signal
- size_t count; // the number of signals received
- SIGNAL_ACTION action; // the action to take
- } signals_waiting[] = {
- { SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE },
- { SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
- { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
- { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY },
- { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS },
- { SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE },
- { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH },
- { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL },
- { SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD },
- // terminator
- { 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST }
- };
- static void signal_handler(int signo) {
- // find the entry in the list
- int i;
- for(i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST ; i++) {
- if(unlikely(signals_waiting[i].signo == signo)) {
- signals_waiting[i].count++;
- if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) {
- char buffer[200 + 1];
- snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name);
- if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) {
- // nothing to do - we cannot write but there is no way to complain about it
- ;
- }
- }
- return;
- }
- }
- }
- void signals_block(void) {
- sigset_t sigset;
- sigfillset(&sigset);
- if(pthread_sigmask(SIG_BLOCK, &sigset, NULL) == -1)
- error("SIGNAL: Could not block signals for threads");
- }
- void signals_unblock(void) {
- sigset_t sigset;
- sigfillset(&sigset);
- if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) {
- error("SIGNAL: Could not unblock signals for threads");
- }
- }
- void signals_init(void) {
- // Catch signals which we want to use
- struct sigaction sa;
- sa.sa_flags = 0;
- // Enable process tracking / reaper if running as init (pid == 1).
- // This prevents zombie processes when running in a container.
- if (getpid() == 1) {
- info("SIGNAL: Enabling reaper");
- myp_init();
- reaper_enabled = 1;
- } else {
- info("SIGNAL: Not enabling reaper");
- }
- // ignore all signals while we run in a signal handler
- sigfillset(&sa.sa_mask);
- int i;
- for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
- switch (signals_waiting[i].action) {
- case NETDATA_SIGNAL_IGNORE:
- sa.sa_handler = SIG_IGN;
- break;
- case NETDATA_SIGNAL_CHILD:
- if (reaper_enabled == 0)
- continue;
- // FALLTHROUGH
- default:
- sa.sa_handler = signal_handler;
- break;
- }
- if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
- error("SIGNAL: Failed to change signal handler for: %s", signals_waiting[i].name);
- }
- }
- void signals_restore_SIGCHLD(void)
- {
- struct sigaction sa;
- if (reaper_enabled == 0)
- return;
- sa.sa_flags = 0;
- sigfillset(&sa.sa_mask);
- sa.sa_handler = signal_handler;
- if(sigaction(SIGCHLD, &sa, NULL) == -1)
- error("SIGNAL: Failed to change signal handler for: SIGCHLD");
- }
- void signals_reset(void) {
- struct sigaction sa;
- sigemptyset(&sa.sa_mask);
- sa.sa_handler = SIG_DFL;
- sa.sa_flags = 0;
- int i;
- for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
- if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1)
- error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name);
- }
- if (reaper_enabled == 1)
- myp_free();
- }
- // reap_child reaps the child identified by pid.
- static void reap_child(pid_t pid) {
- siginfo_t i;
- errno = 0;
- debug(D_CHILDS, "SIGNAL: Reaping pid: %d...", pid);
- if (waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) {
- if (errno != ECHILD)
- error("SIGNAL: Failed to wait for: %d", pid);
- else
- debug(D_CHILDS, "SIGNAL: Already reaped: %d", pid);
- return;
- } else if (i.si_pid == 0) {
- // Process didn't exit, this shouldn't happen.
- return;
- }
- switch (i.si_code) {
- case CLD_EXITED:
- debug(D_CHILDS, "SIGNAL: Child %d exited: %d", pid, i.si_status);
- break;
- case CLD_KILLED:
- debug(D_CHILDS, "SIGNAL: Child %d killed by signal: %d", pid, i.si_status);
- break;
- case CLD_DUMPED:
- debug(D_CHILDS, "SIGNAL: Child %d dumped core by signal: %d", pid, i.si_status);
- break;
- case CLD_STOPPED:
- debug(D_CHILDS, "SIGNAL: Child %d stopped by signal: %d", pid, i.si_status);
- break;
- case CLD_TRAPPED:
- debug(D_CHILDS, "SIGNAL: Child %d trapped by signal: %d", pid, i.si_status);
- break;
- case CLD_CONTINUED:
- debug(D_CHILDS, "SIGNAL: Child %d continued by signal: %d", pid, i.si_status);
- break;
- default:
- debug(D_CHILDS, "SIGNAL: Child %d gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status);
- }
- }
- // reap_children reaps all pending children which are not managed by myp.
- static void reap_children() {
- siginfo_t i;
- while (1 == 1) {
- // Identify which process caused the signal so we can determine
- // if we need to reap a re-parented process.
- i.si_pid = 0;
- if (waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1) {
- if (errno != ECHILD) // This shouldn't happen with WNOHANG but does.
- error("SIGNAL: Failed to wait");
- return;
- } else if (i.si_pid == 0) {
- // No child exited.
- return;
- } else if (myp_reap(i.si_pid) == 0) {
- // myp managed, sleep for a short time to avoid busy wait while
- // this is handled by myp.
- usleep(10000);
- } else {
- // Unknown process, likely a re-parented child, reap it.
- reap_child(i.si_pid);
- }
- }
- }
- void signals_handle(void) {
- while(1) {
- // pause() causes the calling process (or thread) to sleep until a signal
- // is delivered that either terminates the process or causes the invocation
- // of a signal-catching function.
- if(pause() == -1 && errno == EINTR) {
- // loop once, but keep looping while signals are coming in
- // this is needed because a few operations may take some time
- // so we need to check for new signals before pausing again
- int found = 1;
- while(found) {
- found = 0;
- // execute the actions of the signals
- int i;
- for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) {
- if (signals_waiting[i].count) {
- found = 1;
- signals_waiting[i].count = 0;
- const char *name = signals_waiting[i].name;
- switch (signals_waiting[i].action) {
- case NETDATA_SIGNAL_RELOAD_HEALTH:
- error_log_limit_unlimited();
- info("SIGNAL: Received %s. Reloading HEALTH configuration...", name);
- error_log_limit_reset();
- execute_command(CMD_RELOAD_HEALTH, NULL, NULL);
- break;
- case NETDATA_SIGNAL_SAVE_DATABASE:
- error_log_limit_unlimited();
- info("SIGNAL: Received %s. Saving databases...", name);
- error_log_limit_reset();
- execute_command(CMD_SAVE_DATABASE, NULL, NULL);
- break;
- case NETDATA_SIGNAL_REOPEN_LOGS:
- error_log_limit_unlimited();
- info("SIGNAL: Received %s. Reopening all log files...", name);
- error_log_limit_reset();
- execute_command(CMD_REOPEN_LOGS, NULL, NULL);
- break;
- case NETDATA_SIGNAL_EXIT_CLEANLY:
- error_log_limit_unlimited();
- info("SIGNAL: Received %s. Cleaning up to exit...", name);
- commands_exit();
- netdata_cleanup_and_exit(0);
- exit(0);
- break;
- case NETDATA_SIGNAL_FATAL:
- fatal("SIGNAL: Received %s. netdata now exits.", name);
- break;
- case NETDATA_SIGNAL_CHILD:
- debug(D_CHILDS, "SIGNAL: Received %s. Reaping...", name);
- reap_children();
- break;
- default:
- info("SIGNAL: Received %s. No signal handler configured. Ignoring it.", name);
- break;
- }
- }
- }
- }
- }
- else
- error("SIGNAL: pause() returned but it was not interrupted by a signal.");
- }
- }
|