claim.c 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "claim.h"
  3. #include "registry/registry_internals.h"
  4. #include "aclk/aclk.h"
  5. #include "aclk/aclk_proxy.h"
  6. char *claiming_pending_arguments = NULL;
  7. static char *claiming_errors[] = {
  8. "Agent claimed successfully", // 0
  9. "Unknown argument", // 1
  10. "Problems with claiming working directory", // 2
  11. "Missing dependencies", // 3
  12. "Failure to connect to endpoint", // 4
  13. "The CLI didn't work", // 5
  14. "Wrong user", // 6
  15. "Unknown HTTP error message", // 7
  16. "invalid node id", // 8
  17. "invalid node name", // 9
  18. "invalid room id", // 10
  19. "invalid public key", // 11
  20. "token expired/token not found/invalid token", // 12
  21. "already claimed", // 13
  22. "processing claiming", // 14
  23. "Internal Server Error", // 15
  24. "Gateway Timeout", // 16
  25. "Service Unavailable", // 17
  26. "Agent Unique Id Not Readable" // 18
  27. };
  28. /* Retrieve the claim id for the agent.
  29. * Caller owns the string.
  30. */
  31. char *get_agent_claimid()
  32. {
  33. char *result;
  34. rrdhost_aclk_state_lock(localhost);
  35. result = (localhost->aclk_state.claimed_id == NULL) ? NULL : strdupz(localhost->aclk_state.claimed_id);
  36. rrdhost_aclk_state_unlock(localhost);
  37. return result;
  38. }
  39. #define CLAIMING_COMMAND_LENGTH 16384
  40. #define CLAIMING_PROXY_LENGTH CLAIMING_COMMAND_LENGTH/4
  41. extern struct registry registry;
  42. /* rrd_init() and post_conf_load() must have been called before this function */
  43. void claim_agent(char *claiming_arguments)
  44. {
  45. if (!netdata_cloud_setting) {
  46. error("Refusing to claim agent -> cloud functionality has been disabled");
  47. return;
  48. }
  49. #ifndef DISABLE_CLOUD
  50. int exit_code;
  51. pid_t command_pid;
  52. char command_buffer[CLAIMING_COMMAND_LENGTH + 1];
  53. FILE *fp_child_output, *fp_child_input;
  54. // This is guaranteed to be set early in main via post_conf_load()
  55. char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL);
  56. if (cloud_base_url == NULL)
  57. fatal("Do not move the cloud base url out of post_conf_load!!");
  58. const char *proxy_str;
  59. ACLK_PROXY_TYPE proxy_type;
  60. char proxy_flag[CLAIMING_PROXY_LENGTH] = "-noproxy";
  61. proxy_str = aclk_get_proxy(&proxy_type);
  62. if (proxy_type == PROXY_TYPE_SOCKS5 || proxy_type == PROXY_TYPE_HTTP)
  63. snprintf(proxy_flag, CLAIMING_PROXY_LENGTH, "-proxy=\"%s\"", proxy_str);
  64. snprintfz(command_buffer,
  65. CLAIMING_COMMAND_LENGTH,
  66. "exec netdata-claim.sh %s -hostname=%s -id=%s -url=%s -noreload %s",
  67. proxy_flag,
  68. netdata_configured_hostname,
  69. localhost->machine_guid,
  70. cloud_base_url,
  71. claiming_arguments);
  72. info("Executing agent claiming command 'netdata-claim.sh'");
  73. fp_child_output = netdata_popen(command_buffer, &command_pid, &fp_child_input);
  74. if(!fp_child_output) {
  75. error("Cannot popen(\"%s\").", command_buffer);
  76. return;
  77. }
  78. info("Waiting for claiming command to finish.");
  79. while (fgets(command_buffer, CLAIMING_COMMAND_LENGTH, fp_child_output) != NULL) {;}
  80. exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
  81. info("Agent claiming command returned with code %d", exit_code);
  82. if (0 == exit_code) {
  83. load_claiming_state();
  84. return;
  85. }
  86. if (exit_code < 0) {
  87. error("Agent claiming command failed to complete its run.");
  88. return;
  89. }
  90. errno = 0;
  91. unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1;
  92. if ((unsigned)exit_code > maximum_known_exit_code) {
  93. error("Agent failed to be claimed with an unknown error.");
  94. return;
  95. }
  96. error("Agent failed to be claimed with the following error message:");
  97. error("\"%s\"", claiming_errors[exit_code]);
  98. #else
  99. UNUSED(claiming_arguments);
  100. UNUSED(claiming_errors);
  101. #endif
  102. }
  103. #ifdef ENABLE_ACLK
  104. extern int aclk_connected, aclk_kill_link, aclk_disable_runtime;
  105. #endif
  106. /* Change the claimed state of the agent.
  107. *
  108. * This only happens when the user has explicitly requested it:
  109. * - via the cli tool by reloading the claiming state
  110. * - after spawning the claim because of a command-line argument
  111. * If this happens with the ACLK active under an old claim then we MUST KILL THE LINK
  112. */
  113. void load_claiming_state(void)
  114. {
  115. // --------------------------------------------------------------------
  116. // Check if the cloud is enabled
  117. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  118. netdata_cloud_setting = 0;
  119. #else
  120. uuid_t uuid;
  121. // Propagate into aclk and registry. Be kind of atomic...
  122. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  123. rrdhost_aclk_state_lock(localhost);
  124. if (localhost->aclk_state.claimed_id) {
  125. if (aclk_connected)
  126. localhost->aclk_state.prev_claimed_id = strdupz(localhost->aclk_state.claimed_id);
  127. freez(localhost->aclk_state.claimed_id);
  128. localhost->aclk_state.claimed_id = NULL;
  129. }
  130. if (aclk_connected)
  131. {
  132. info("Agent was already connected to Cloud - forcing reconnection under new credentials");
  133. aclk_kill_link = 1;
  134. }
  135. aclk_disable_runtime = 0;
  136. char filename[FILENAME_MAX + 1];
  137. snprintfz(filename, FILENAME_MAX, "%s/cloud.d/claimed_id", netdata_configured_varlib_dir);
  138. long bytes_read;
  139. char *claimed_id = read_by_filename(filename, &bytes_read);
  140. if(claimed_id && uuid_parse(claimed_id, uuid)) {
  141. error("claimed_id \"%s\" doesn't look like valid UUID", claimed_id);
  142. freez(claimed_id);
  143. claimed_id = NULL;
  144. }
  145. if(claimed_id) {
  146. localhost->aclk_state.claimed_id = mallocz(UUID_STR_LEN);
  147. uuid_unparse_lower(uuid, localhost->aclk_state.claimed_id);
  148. }
  149. invalidate_node_instances(&localhost->host_uuid, claimed_id ? &uuid : NULL);
  150. metaqueue_store_claim_id(&localhost->host_uuid, claimed_id ? &uuid : NULL);
  151. rrdhost_aclk_state_unlock(localhost);
  152. if (!claimed_id) {
  153. info("Unable to load '%s', setting state to AGENT_UNCLAIMED", filename);
  154. return;
  155. }
  156. freez(claimed_id);
  157. info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename);
  158. netdata_cloud_setting = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1);
  159. #endif
  160. }
  161. struct config cloud_config = { .first_section = NULL,
  162. .last_section = NULL,
  163. .mutex = NETDATA_MUTEX_INITIALIZER,
  164. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  165. .rwlock = AVL_LOCK_INITIALIZER } };
  166. void load_cloud_conf(int silent)
  167. {
  168. char *filename;
  169. errno = 0;
  170. int ret = 0;
  171. filename = strdupz_path_subpath(netdata_configured_varlib_dir, "cloud.d/cloud.conf");
  172. ret = appconfig_load(&cloud_config, filename, 1, NULL);
  173. if(!ret && !silent) {
  174. info("CONFIG: cannot load cloud config '%s'. Running with internal defaults.", filename);
  175. }
  176. freez(filename);
  177. }