Browse Source

agent alert notifications redirect (#15350)

* agent alert notifications redirect

* set the same cookies with SameSite: Strict

* registry search now requires only "for" parameter

* registry responses are not cacheable

* fix typo and add more error checking

* registry memory when mmap is used

* fix free with aral
Costa Tsaousis 1 year ago
parent
commit
b61ddad5e6

+ 11 - 3
health/health.c

@@ -81,8 +81,9 @@ static bool prepare_command(BUFFER *wb,
                             const char *crit_alarms,
                             const char *classification,
                             const char *edit_command,
-                            const char *machine_guid)
-{
+                            const char *machine_guid,
+                            uuid_t *transition_id
+) {
     char buf[8192];
     size_t n = 8192 - 1;
 
@@ -188,6 +189,12 @@ static bool prepare_command(BUFFER *wb,
         return false;
     buffer_sprintf(wb, " '%s'", buf);
 
+    char tr_id[UUID_STR_LEN];
+    uuid_unparse_lower(*transition_id, tr_id);
+    if (!sanitize_command_argument_string(buf, tr_id, n))
+        return false;
+    buffer_sprintf(wb, " '%s'", buf);
+
     return true;
 }
 
@@ -575,7 +582,8 @@ static inline void health_alarm_execute(RRDHOST *host, ALARM_ENTRY *ae) {
                               buffer_tostring(crit_alarms),
                               ae->classification?ae_classification(ae):"Unknown",
                               edit_command,
-                              host != localhost ? host->machine_guid:"");
+                              host->machine_guid,
+                              &ae->transition_id);
 
     const char *command_to_run = buffer_tostring(wb);
     if (ok) {

+ 9 - 22
health/notifications/alarm-notify.sh.in

@@ -3,7 +3,7 @@
 
 # netdata
 # real-time performance and health monitoring, done right!
-# (C) 2017 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2023 Netdata Inc.
 # SPDX-License-Identifier: GPL-3.0-or-later
 #
 # Script to send alarm notifications for netdata
@@ -246,7 +246,8 @@ else
   total_crit_alarms="${26}"  # List of alarms in critical state
   classification="${27}"     # The class field from .conf files
   edit_command_line="${28}"  # The command to edit the alarm, with the line number
-  child_machine_guid="${29}" # If populated, the notification is sent for a child
+  child_machine_guid="${29}" # the machine_guid of the child
+  transition_id="${30}"      # the transition_id of the alert
 fi
 
 # -----------------------------------------------------------------------------
@@ -2488,31 +2489,17 @@ urlencode "${value_string}" >/dev/null
 url_value_string="${REPLY}"
 
 redirect_params="host=${url_host}&chart=${url_chart}&family=${url_family}&alarm=${url_name}&alarm_unique_id=${unique_id}&alarm_id=${alarm_id}&alarm_event_id=${event_id}&alarm_when=${when}&alarm_status=${status}&alarm_chart=${chart}&alarm_value=${url_value_string}"
-GOTOCLOUD=0
 
-if [ "${NETDATA_REGISTRY_URL}" == "https://registry.my-netdata.io" ]; then
-  if [ -z "${NETDATA_REGISTRY_UNIQUE_ID}" ]; then
-    if [ -f "@registrydir_POST@/netdata.public.unique.id" ]; then
-      NETDATA_REGISTRY_UNIQUE_ID="$(cat "@registrydir_POST@/netdata.public.unique.id")"
-    fi
-  fi
-  if [ -n "${NETDATA_REGISTRY_UNIQUE_ID}" ]; then
-    GOTOCLOUD=1
-  fi
-fi
-
-if [ ${GOTOCLOUD} -eq 0 ]; then
-  goto_url="${NETDATA_REGISTRY_URL}/goto-host-from-alarm.html?${redirect_params}"
-else
-    # Temporarily disable alarm redirection, as the cloud endpoint no longer exists. This functionality will be restored after discussion on #9487. For now, just lead to netdata.cloud
-    # Re-allow alarm redirection, for alarms 2.0, new template
-  if [ -z "${child_machine_guid}" ]; then
-      goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentId=${NETDATA_REGISTRY_UNIQUE_ID}&${redirect_params}"
+if [ -z "${NETDATA_REGISTRY_UNIQUE_ID}" ]; then
+  if [ -f "@registrydir_POST@/netdata.public.unique.id" ]; then
+    NETDATA_REGISTRY_UNIQUE_ID="$(cat "@registrydir_POST@/netdata.public.unique.id")"
   else
-      goto_url="${NETDATA_REGISTRY_CLOUD_BASE_URL}/alarms/redirect?agentId=${NETDATA_REGISTRY_UNIQUE_ID}&childId=${child_machine_guid}&${redirect_params}"
+    error "failed to identify this agent via its NETDATA_REGISTRY_UNIQUE_ID."
   fi
 fi
 
+goto_url="${NETDATA_REGISTRY_URL}/registry-alert-redirect.html?agent_machine_guid=${NETDATA_REGISTRY_UNIQUE_ID}&host_machine_guid=${child_machine_guid}&transition_id=${transition_id}&${redirect_params}"
+
 # the severity of the alarm
 severity="${status}"
 

+ 33 - 15
registry/registry.c

@@ -33,11 +33,13 @@ static void registry_set_cookie(struct web_client *w, const char *guid) {
     strftime(e_date, sizeof(e_date), "%a, %d %b %Y %H:%M:%S %Z", etm);
 
     buffer_sprintf(w->response.header, "Set-Cookie: " NETDATA_REGISTRY_COOKIE_NAME "=%s; Expires=%s\r\n", guid, e_date);
+    buffer_sprintf(w->response.header, "Set-Cookie: " NETDATA_REGISTRY_COOKIE_NAME "=%s; SameSite=Strict; Expires=%s\r\n", guid, e_date);
     if(registry.enable_cookies_samesite_secure)
         buffer_sprintf(w->response.header, "Set-Cookie: " NETDATA_REGISTRY_COOKIE_NAME "=%s; Expires=%s; SameSite=None; Secure\r\n", guid, e_date);
 
     if(registry.registry_domain && *registry.registry_domain) {
         buffer_sprintf(w->response.header, "Set-Cookie: " NETDATA_REGISTRY_COOKIE_NAME "=%s; Expires=%s; Domain=%s\r\n", guid, e_date, registry.registry_domain);
+        buffer_sprintf(w->response.header, "Set-Cookie: " NETDATA_REGISTRY_COOKIE_NAME "=%s; Expires=%s; Domain=%s; SameSite=Strict\r\n", guid, e_date, registry.registry_domain);
         if(registry.enable_cookies_samesite_secure)
             buffer_sprintf(w->response.header, "Set-Cookie: " NETDATA_REGISTRY_COOKIE_NAME "=%s; Expires=%s; Domain=%s; SameSite=None; Secure\r\n", guid, e_date, registry.registry_domain);
     }
@@ -166,16 +168,26 @@ int registry_request_hello_json(RRDHOST *host, struct web_client *w) {
     if(host->node_id)
         buffer_json_member_add_uuid(w->response.data, "node_id", host->node_id);
 
-    char *claim_id = get_agent_claimid();
-    if(claim_id) {
-        buffer_json_member_add_string(w->response.data, "claim_id", claim_id);
-        freez(claim_id);
+    buffer_json_member_add_object(w->response.data, "agent");
+    {
+        buffer_json_member_add_string(w->response.data, "machine_guid", localhost->machine_guid);
+
+        if(localhost->node_id)
+            buffer_json_member_add_uuid(w->response.data, "node_id", localhost->node_id);
+
+        char *claim_id = get_agent_claimid();
+        if (claim_id) {
+            buffer_json_member_add_string(w->response.data, "claim_id", claim_id);
+            freez(claim_id);
+        }
+
+        buffer_json_member_add_boolean(w->response.data, "bearer_protection", netdata_is_protected_by_bearer);
     }
+    buffer_json_object_close(w->response.data);
 
     buffer_json_member_add_string(w->response.data, "registry", registry.registry_to_announce);
     buffer_json_member_add_string(w->response.data, "cloud_base_url", registry.cloud_base_url);
     buffer_json_member_add_boolean(w->response.data, "anonymous_statistics", netdata_anonymous_statistics_enabled);
-    buffer_json_member_add_boolean(w->response.data, "bearer_protection", netdata_is_protected_by_bearer);
 
     buffer_json_member_add_array(w->response.data, "nodes");
     RRDHOST *h;
@@ -296,19 +308,19 @@ int registry_request_delete_json(RRDHOST *host, struct web_client *w, char *pers
 // public SEARCH request
 
 // the main method for searching the URLs of a netdata
-int registry_request_search_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when) {
+int registry_request_search_json(RRDHOST *host, struct web_client *w, char *person_guid, char *request_machine) {
     if(!registry.enabled)
         return registry_json_disabled(host, w, "search");
 
-    if(!registry_is_valid_url(url)) {
-        buffer_flush(w->response.data);
-        buffer_strcat(w->response.data, "Invalid URL given in the request");
-        return HTTP_RESP_BAD_REQUEST;
+    if(!person_guid || !person_guid[0]) {
+        registry_json_header(host, w, "search", REGISTRY_STATUS_FAILED);
+        registry_json_footer(w);
+        return HTTP_RESP_PRECOND_FAIL;
     }
 
     registry_lock();
 
-    REGISTRY_MACHINE *m = registry_request_machine(person_guid, machine_guid, url, request_machine, when);
+    REGISTRY_MACHINE *m = registry_request_machine(person_guid, request_machine);
     if(!m) {
         registry_json_header(host, w, "search", REGISTRY_STATUS_FAILED);
         registry_json_footer(w);
@@ -339,6 +351,12 @@ int registry_request_switch_json(RRDHOST *host, struct web_client *w, char *pers
     if(!registry.enabled)
         return registry_json_disabled(host, w, "switch");
 
+    if(!person_guid || !person_guid[0]) {
+        buffer_flush(w->response.data);
+        buffer_strcat(w->response.data, "Who are you? Person GUID is missing");
+        return HTTP_RESP_PRECOND_FAIL;
+    }
+
     if(!registry_is_valid_url(url)) {
         buffer_flush(w->response.data);
         buffer_strcat(w->response.data, "Invalid URL given in the request");
@@ -494,16 +512,16 @@ void registry_statistics(void) {
     }
 
     struct aral_statistics *p_aral_stats = aral_statistics(registry.persons_aral);
-    rrddim_set(stm, "persons",       (collected_number)p_aral_stats->structures.allocated_bytes + (collected_number)p_aral_stats->malloc.allocated_bytes);
+    rrddim_set(stm, "persons",       (collected_number)p_aral_stats->structures.allocated_bytes + (collected_number)p_aral_stats->malloc.allocated_bytes + (collected_number)p_aral_stats->mmap.allocated_bytes);
 
     struct aral_statistics *m_aral_stats = aral_statistics(registry.machines_aral);
-    rrddim_set(stm, "machines",      (collected_number)m_aral_stats->structures.allocated_bytes + (collected_number)m_aral_stats->malloc.allocated_bytes);
+    rrddim_set(stm, "machines",      (collected_number)m_aral_stats->structures.allocated_bytes + (collected_number)m_aral_stats->malloc.allocated_bytes + (collected_number)m_aral_stats->mmap.allocated_bytes);
 
     struct aral_statistics *pu_aral_stats = aral_statistics(registry.person_urls_aral);
-    rrddim_set(stm, "persons_urls",  (collected_number)pu_aral_stats->structures.allocated_bytes + (collected_number)pu_aral_stats->malloc.allocated_bytes);
+    rrddim_set(stm, "persons_urls",  (collected_number)pu_aral_stats->structures.allocated_bytes + (collected_number)pu_aral_stats->malloc.allocated_bytes + (collected_number)pu_aral_stats->mmap.allocated_bytes);
 
     struct aral_statistics *mu_aral_stats = aral_statistics(registry.machine_urls_aral);
-    rrddim_set(stm, "machines_urls", (collected_number)mu_aral_stats->structures.allocated_bytes + (collected_number)mu_aral_stats->malloc.allocated_bytes);
+    rrddim_set(stm, "machines_urls", (collected_number)mu_aral_stats->structures.allocated_bytes + (collected_number)mu_aral_stats->malloc.allocated_bytes + (collected_number)mu_aral_stats->mmap.allocated_bytes);
 
     rrdset_done(stm);
 }

+ 1 - 1
registry/registry.h

@@ -64,7 +64,7 @@ void registry_free(void);
 // HTTP requests handled by the registry
 int registry_request_access_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *name, time_t when);
 int registry_request_delete_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *delete_url, time_t when);
-int registry_request_search_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when);
+int registry_request_search_json(RRDHOST *host, struct web_client *w, char *person_guid, char *request_machine);
 int registry_request_switch_json(RRDHOST *host, struct web_client *w, char *person_guid, char *machine_guid, char *url, char *new_person_guid, time_t when);
 int registry_request_hello_json(RRDHOST *host, struct web_client *w);
 

+ 3 - 2
registry/registry_init.c

@@ -202,7 +202,7 @@ static int machine_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v
         count++;
     }
 
-    freez(m);
+    aral_freez(registry.machines_aral, m);
 
     return count + 1;
 }
@@ -219,13 +219,14 @@ static int registry_person_del_callback(const DICTIONARY_ITEM *item __maybe_unus
     //dictionary_del(registry.persons, p->guid);
 
     netdata_log_debug(D_REGISTRY, "Registry: freeing person '%s'", p->guid);
-    freez(p);
+    aral_freez(registry.persons_aral, p);
 
     return 1;
 }
 
 void registry_free(void) {
     if(!registry.enabled) return;
+    registry.enabled = false;
 
     netdata_log_debug(D_REGISTRY, "Registry: destroying persons dictionary");
     dictionary_walkthrough_read(registry.persons, registry_person_del_callback, NULL);

+ 19 - 44
registry/registry_internals.c

@@ -209,63 +209,38 @@ REGISTRY_PERSON *registry_request_delete(const char *person_guid, char *machine_
 }
 
 
-// a structure to pass to the dictionary_walkthrough_read() callback handler
-struct machine_request_callback_data {
-    REGISTRY_MACHINE *find_this_machine;
-    REGISTRY_PERSON_URL *result;
-};
-
-// the callback function
-// this will be run for every PERSON_URL of this PERSON
-static int machine_request_callback(void *entry, void *data) {
-    REGISTRY_PERSON_URL *mypu = (REGISTRY_PERSON_URL *)entry;
-    struct machine_request_callback_data *myrdata = (struct machine_request_callback_data *)data;
-
-    if(mypu->machine == myrdata->find_this_machine) {
-        myrdata->result = mypu;
-        return -1; // this will also stop the walk through
-    }
-
-    return 0; // continue
-}
-
-REGISTRY_MACHINE *registry_request_machine(const char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when) {
-    (void)when;
-
+REGISTRY_MACHINE *registry_request_machine(const char *person_guid, char *request_machine) {
+    char pbuf[GUID_LEN + 1];
     char mbuf[GUID_LEN + 1];
 
-    REGISTRY_PERSON *p = NULL;
-    REGISTRY_MACHINE *m = NULL;
-    REGISTRY_PERSON_URL *pu = registry_verify_request(person_guid, machine_guid, url, &p, &m);
-    if(!pu || !p || !m) return NULL;
+    // make sure the person GUID is valid
+    if(regenerate_guid(person_guid, pbuf) == -1) {
+        netdata_log_info("REGISTRY: %s(): invalid person GUID '%s'", __FUNCTION__ , person_guid);
+        return NULL;
+    }
+    person_guid = pbuf;
 
-    // make sure the machine GUID is valid
+    // make sure the person GUID is valid
     if(regenerate_guid(request_machine, mbuf) == -1) {
-        netdata_log_info("Registry Machine URLs request: invalid machine GUID, person: '%s', machine '%s', url '%s', request machine '%s'", p->guid, m->guid, string2str(pu->url), request_machine);
+        netdata_log_info("REGISTRY: %s(): invalid search machine GUID '%s'", __FUNCTION__ , request_machine);
         return NULL;
     }
     request_machine = mbuf;
 
-    // make sure the machine exists
-    m = registry_machine_find(request_machine);
-    if(!m) {
-        netdata_log_info("Registry Machine URLs request: machine not found, person: '%s', machine '%s', url '%s', request machine '%s'", p->guid, machine_guid, string2str(pu->url), request_machine);
-        return NULL;
-    }
+    REGISTRY_PERSON *p = registry_person_find(person_guid);
+    if(!p) return NULL;
+
+    REGISTRY_MACHINE *m = registry_machine_find(request_machine);
+    if(!m) return NULL;
 
     // Verify the user has in the past accessed this machine
     // We will walk through the PERSON_URLs to find the machine
     // linking to our machine
 
-    // a structure to pass to the dictionary_walkthrough_read() callback handler
-    struct machine_request_callback_data rdata = { m, NULL };
-
-    // request a walk through on the dictionary
-    for(pu = p->person_urls; pu ;pu = pu->next)
-        machine_request_callback(pu, &rdata);
-
-    if(rdata.result)
-        return m;
+    // make sure the user has access
+    for(REGISTRY_PERSON_URL *pu = p->person_urls; pu ;pu = pu->next)
+        if(pu->machine == m)
+            return m;
 
     return NULL;
 }

+ 1 - 1
registry/registry_internals.h

@@ -72,7 +72,7 @@ extern struct registry registry;
 // REGISTRY LOW-LEVEL REQUESTS (in registry-internals.c)
 REGISTRY_PERSON *registry_request_access(const char *person_guid, char *machine_guid, char *url, char *name, time_t when);
 REGISTRY_PERSON *registry_request_delete(const char *person_guid, char *machine_guid, char *url, char *delete_url, time_t when);
-REGISTRY_MACHINE *registry_request_machine(const char *person_guid, char *machine_guid, char *url, char *request_machine, time_t when);
+REGISTRY_MACHINE *registry_request_machine(const char *person_guid, char *request_machine);
 
 // REGISTRY LOG (in registry_log.c)
 void registry_log(char action, REGISTRY_PERSON *p, REGISTRY_MACHINE *m, STRING *u, const char *name);

+ 1 - 0
registry/registry_machine.c

@@ -55,6 +55,7 @@ REGISTRY_MACHINE *registry_machine_allocate(const char *machine_guid, time_t whe
 
     m->first_t = m->last_t = (uint32_t)when;
     m->usages = 0;
+    m->links = 0;
 
     registry.machines_count++;
 

+ 6 - 4
web/api/web_api_v1.c

@@ -900,7 +900,7 @@ cleanup:
 // /api/v1/registry?action=delete&machine=${machine_guid}&name=${hostname}&url=${url}&delete_url=${delete_url}
 //
 // Search for the URLs of a machine:
-// /api/v1/registry?action=search&machine=${machine_guid}&name=${hostname}&url=${url}&for=${machine_guid}
+// /api/v1/registry?action=search&for=${machine_guid}
 //
 // Impersonate:
 // /api/v1/registry?action=switch&machine=${machine_guid}&name=${hostname}&url=${url}&to=${new_person_guid}
@@ -1026,6 +1026,8 @@ inline int web_client_api_request_v1_registry(RRDHOST *host, struct web_client *
             return web_client_permission_denied(w);
     }
 
+    buffer_no_cacheable(w->response.data);
+
     switch(action) {
         case 'A':
             if(unlikely(!machine_guid || !machine_url || !url_name)) {
@@ -1050,15 +1052,15 @@ inline int web_client_api_request_v1_registry(RRDHOST *host, struct web_client *
             return registry_request_delete_json(host, w, person_guid, machine_guid, machine_url, delete_url, now_realtime_sec());
 
         case 'S':
-            if(unlikely(!machine_guid || !machine_url || !search_machine_guid)) {
-                netdata_log_error("Invalid registry request - search requires these parameters: machine ('%s'), url ('%s'), for ('%s')", machine_guid?machine_guid:"UNSET", machine_url?machine_url:"UNSET", search_machine_guid?search_machine_guid:"UNSET");
+            if(unlikely(!search_machine_guid)) {
+                netdata_log_error("Invalid registry request - search requires these parameters: for ('%s')", search_machine_guid?search_machine_guid:"UNSET");
                 buffer_flush(w->response.data);
                 buffer_strcat(w->response.data, "Invalid registry Search request.");
                 return HTTP_RESP_BAD_REQUEST;
             }
 
             web_client_enable_tracking_required(w);
-            return registry_request_search_json(host, w, person_guid, machine_guid, machine_url, search_machine_guid, now_realtime_sec());
+            return registry_request_search_json(host, w, person_guid, search_machine_guid);
 
         case 'W':
             if(unlikely(!machine_guid || !machine_url || !to_person_guid)) {