claim.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "claim.h"
  3. #include "registry/registry_internals.h"
  4. #include "aclk/aclk.h"
  5. #include "aclk/aclk_proxy.h"
  6. char *claiming_pending_arguments = NULL;
  7. static char *claiming_errors[] = {
  8. "Agent claimed successfully", // 0
  9. "Unknown argument", // 1
  10. "Problems with claiming working directory", // 2
  11. "Missing dependencies", // 3
  12. "Failure to connect to endpoint", // 4
  13. "The CLI didn't work", // 5
  14. "Wrong user", // 6
  15. "Unknown HTTP error message", // 7
  16. "invalid node id", // 8
  17. "invalid node name", // 9
  18. "invalid room id", // 10
  19. "invalid public key", // 11
  20. "token expired/token not found/invalid token", // 12
  21. "already claimed", // 13
  22. "processing claiming", // 14
  23. "Internal Server Error", // 15
  24. "Gateway Timeout", // 16
  25. "Service Unavailable", // 17
  26. "Agent Unique Id Not Readable" // 18
  27. };
  28. /* Retrieve the claim id for the agent.
  29. * Caller owns the string.
  30. */
  31. char *get_agent_claimid()
  32. {
  33. char *result;
  34. rrdhost_aclk_state_lock(localhost);
  35. result = (localhost->aclk_state.claimed_id == NULL) ? NULL : strdupz(localhost->aclk_state.claimed_id);
  36. rrdhost_aclk_state_unlock(localhost);
  37. return result;
  38. }
  39. #define CLAIMING_COMMAND_LENGTH 16384
  40. #define CLAIMING_PROXY_LENGTH (CLAIMING_COMMAND_LENGTH/4)
  41. extern struct registry registry;
  42. /* rrd_init() and post_conf_load() must have been called before this function */
  43. CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, const char **msg)
  44. {
  45. if (!force || !netdata_cloud_enabled) {
  46. netdata_log_error("Refusing to claim agent -> cloud functionality has been disabled");
  47. return CLAIM_AGENT_CLOUD_DISABLED;
  48. }
  49. #ifndef DISABLE_CLOUD
  50. int exit_code;
  51. pid_t command_pid;
  52. char command_buffer[CLAIMING_COMMAND_LENGTH + 1];
  53. FILE *fp_child_output, *fp_child_input;
  54. // This is guaranteed to be set early in main via post_conf_load()
  55. char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL);
  56. if (cloud_base_url == NULL) {
  57. internal_fatal(true, "Do not move the cloud base url out of post_conf_load!!");
  58. return CLAIM_AGENT_NO_CLOUD_URL;
  59. }
  60. const char *proxy_str;
  61. ACLK_PROXY_TYPE proxy_type;
  62. char proxy_flag[CLAIMING_PROXY_LENGTH] = "-noproxy";
  63. proxy_str = aclk_get_proxy(&proxy_type);
  64. if (proxy_type == PROXY_TYPE_SOCKS5 || proxy_type == PROXY_TYPE_HTTP)
  65. snprintf(proxy_flag, CLAIMING_PROXY_LENGTH, "-proxy=\"%s\"", proxy_str);
  66. snprintfz(command_buffer,
  67. CLAIMING_COMMAND_LENGTH,
  68. "exec netdata-claim.sh %s -hostname=%s -id=%s -url=%s -noreload %s",
  69. proxy_flag,
  70. netdata_configured_hostname,
  71. localhost->machine_guid,
  72. cloud_base_url,
  73. claiming_arguments);
  74. netdata_log_info("Executing agent claiming command 'netdata-claim.sh'");
  75. fp_child_output = netdata_popen(command_buffer, &command_pid, &fp_child_input);
  76. if(!fp_child_output) {
  77. netdata_log_error("Cannot popen(\"%s\").", command_buffer);
  78. return CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT;
  79. }
  80. netdata_log_info("Waiting for claiming command to finish.");
  81. while (fgets(command_buffer, CLAIMING_COMMAND_LENGTH, fp_child_output) != NULL) {;}
  82. exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
  83. netdata_log_info("Agent claiming command returned with code %d", exit_code);
  84. if (0 == exit_code) {
  85. load_claiming_state();
  86. return CLAIM_AGENT_OK;
  87. }
  88. if (exit_code < 0) {
  89. netdata_log_error("Agent claiming command failed to complete its run.");
  90. return CLAIM_AGENT_CLAIM_SCRIPT_FAILED;
  91. }
  92. errno = 0;
  93. unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1;
  94. if ((unsigned)exit_code > maximum_known_exit_code) {
  95. netdata_log_error("Agent failed to be claimed with an unknown error.");
  96. return CLAIM_AGENT_CLAIM_SCRIPT_RETURNED_INVALID_CODE;
  97. }
  98. netdata_log_error("Agent failed to be claimed with the following error message:");
  99. netdata_log_error("\"%s\"", claiming_errors[exit_code]);
  100. if(msg) *msg = claiming_errors[exit_code];
  101. #else
  102. UNUSED(claiming_arguments);
  103. UNUSED(claiming_errors);
  104. #endif
  105. return CLAIM_AGENT_FAILED_WITH_MESSAGE;
  106. }
  107. #ifdef ENABLE_ACLK
  108. extern int aclk_connected, aclk_kill_link, aclk_disable_runtime;
  109. #endif
  110. /* Change the claimed state of the agent.
  111. *
  112. * This only happens when the user has explicitly requested it:
  113. * - via the cli tool by reloading the claiming state
  114. * - after spawning the claim because of a command-line argument
  115. * If this happens with the ACLK active under an old claim then we MUST KILL THE LINK
  116. */
  117. void load_claiming_state(void)
  118. {
  119. // --------------------------------------------------------------------
  120. // Check if the cloud is enabled
  121. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  122. netdata_cloud_enabled = false;
  123. #else
  124. uuid_t uuid;
  125. // Propagate into aclk and registry. Be kind of atomic...
  126. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  127. rrdhost_aclk_state_lock(localhost);
  128. if (localhost->aclk_state.claimed_id) {
  129. if (aclk_connected)
  130. localhost->aclk_state.prev_claimed_id = strdupz(localhost->aclk_state.claimed_id);
  131. freez(localhost->aclk_state.claimed_id);
  132. localhost->aclk_state.claimed_id = NULL;
  133. }
  134. if (aclk_connected)
  135. {
  136. netdata_log_info("Agent was already connected to Cloud - forcing reconnection under new credentials");
  137. aclk_kill_link = 1;
  138. }
  139. aclk_disable_runtime = 0;
  140. char filename[FILENAME_MAX + 1];
  141. snprintfz(filename, FILENAME_MAX, "%s/cloud.d/claimed_id", netdata_configured_varlib_dir);
  142. long bytes_read;
  143. char *claimed_id = read_by_filename(filename, &bytes_read);
  144. if(claimed_id && uuid_parse(claimed_id, uuid)) {
  145. netdata_log_error("claimed_id \"%s\" doesn't look like valid UUID", claimed_id);
  146. freez(claimed_id);
  147. claimed_id = NULL;
  148. }
  149. if(claimed_id) {
  150. localhost->aclk_state.claimed_id = mallocz(UUID_STR_LEN);
  151. uuid_unparse_lower(uuid, localhost->aclk_state.claimed_id);
  152. }
  153. invalidate_node_instances(&localhost->host_uuid, claimed_id ? &uuid : NULL);
  154. metaqueue_store_claim_id(&localhost->host_uuid, claimed_id ? &uuid : NULL);
  155. rrdhost_aclk_state_unlock(localhost);
  156. if (!claimed_id) {
  157. netdata_log_info("Unable to load '%s', setting state to AGENT_UNCLAIMED", filename);
  158. return;
  159. }
  160. freez(claimed_id);
  161. netdata_log_info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename);
  162. netdata_cloud_enabled = appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled);
  163. #endif
  164. }
  165. struct config cloud_config = { .first_section = NULL,
  166. .last_section = NULL,
  167. .mutex = NETDATA_MUTEX_INITIALIZER,
  168. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  169. .rwlock = AVL_LOCK_INITIALIZER } };
  170. void load_cloud_conf(int silent)
  171. {
  172. char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD");
  173. if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1))
  174. netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
  175. char *filename;
  176. errno = 0;
  177. int ret = 0;
  178. filename = strdupz_path_subpath(netdata_configured_varlib_dir, "cloud.d/cloud.conf");
  179. ret = appconfig_load(&cloud_config, filename, 1, NULL);
  180. if(!ret && !silent)
  181. netdata_log_info("CONFIG: cannot load cloud config '%s'. Running with internal defaults.", filename);
  182. freez(filename);
  183. // --------------------------------------------------------------------
  184. // Check if the cloud is enabled
  185. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  186. netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
  187. #else
  188. netdata_cloud_enabled = appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled);
  189. #endif
  190. // This must be set before any point in the code that accesses it. Do not move it from this function.
  191. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  192. }
  193. static char *netdata_random_session_id_filename = NULL;
  194. static uuid_t netdata_random_session_id = { 0 };
  195. bool netdata_random_session_id_generate(void) {
  196. static char guid[UUID_STR_LEN] = "";
  197. uuid_generate_random(netdata_random_session_id);
  198. uuid_unparse_lower(netdata_random_session_id, guid);
  199. char filename[FILENAME_MAX + 1];
  200. snprintfz(filename, FILENAME_MAX, "%s/netdata_random_session_id", netdata_configured_varlib_dir);
  201. bool ret = true;
  202. (void)unlink(filename);
  203. // save it
  204. int fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 640);
  205. if(fd == -1) {
  206. netdata_log_error("Cannot create random session id file '%s'.", filename);
  207. ret = false;
  208. }
  209. else {
  210. if (write(fd, guid, UUID_STR_LEN - 1) != UUID_STR_LEN - 1) {
  211. netdata_log_error("Cannot write the random session id file '%s'.", filename);
  212. ret = false;
  213. } else {
  214. ssize_t bytes = write(fd, "\n", 1);
  215. UNUSED(bytes);
  216. }
  217. close(fd);
  218. }
  219. if(ret && (!netdata_random_session_id_filename || strcmp(netdata_random_session_id_filename, filename) != 0)) {
  220. freez(netdata_random_session_id_filename);
  221. netdata_random_session_id_filename = strdupz(filename);
  222. }
  223. return ret;
  224. }
  225. const char *netdata_random_session_id_get_filename(void) {
  226. if(!netdata_random_session_id_filename)
  227. netdata_random_session_id_generate();
  228. return netdata_random_session_id_filename;
  229. }
  230. bool netdata_random_session_id_matches(const char *guid) {
  231. if(uuid_is_null(netdata_random_session_id))
  232. return false;
  233. uuid_t uuid;
  234. if(uuid_parse(guid, uuid))
  235. return false;
  236. if(uuid_compare(netdata_random_session_id, uuid) == 0)
  237. return true;
  238. return false;
  239. }
  240. static bool check_claim_param(const char *s) {
  241. if(!s || !*s) return true;
  242. do {
  243. if(isalnum(*s) || *s == '.' || *s == ',' || *s == '-' || *s == ':' || *s == '/' || *s == '_')
  244. ;
  245. else
  246. return false;
  247. } while(*++s);
  248. return true;
  249. }
  250. void claim_reload_all(void) {
  251. error_log_limit_unlimited();
  252. load_claiming_state();
  253. registry_update_cloud_base_url();
  254. rrdpush_send_claimed_id(localhost);
  255. error_log_limit_reset();
  256. }
  257. int api_v2_claim(struct web_client *w, char *url) {
  258. char *key = NULL;
  259. char *token = NULL;
  260. char *rooms = NULL;
  261. char *base_url = NULL;
  262. while (url) {
  263. char *value = strsep_skip_consecutive_separators(&url, "&");
  264. if (!value || !*value) continue;
  265. char *name = strsep_skip_consecutive_separators(&value, "=");
  266. if (!name || !*name) continue;
  267. if (!value || !*value) continue;
  268. if(!strcmp(name, "key"))
  269. key = value;
  270. else if(!strcmp(name, "token"))
  271. token = value;
  272. else if(!strcmp(name, "rooms"))
  273. rooms = value;
  274. else if(!strcmp(name, "url"))
  275. base_url = value;
  276. }
  277. BUFFER *wb = w->response.data;
  278. buffer_flush(wb);
  279. buffer_json_initialize(wb, "\"", "\"", 0, true, false);
  280. time_t now_s = now_realtime_sec();
  281. CLOUD_STATUS status = buffer_json_cloud_status(wb, now_s);
  282. bool can_be_claimed = false;
  283. switch(status) {
  284. case CLOUD_STATUS_AVAILABLE:
  285. case CLOUD_STATUS_DISABLED:
  286. case CLOUD_STATUS_OFFLINE:
  287. can_be_claimed = true;
  288. break;
  289. case CLOUD_STATUS_UNAVAILABLE:
  290. case CLOUD_STATUS_BANNED:
  291. case CLOUD_STATUS_ONLINE:
  292. can_be_claimed = false;
  293. break;
  294. }
  295. buffer_json_member_add_boolean(wb, "can_be_claimed", can_be_claimed);
  296. if(can_be_claimed && key) {
  297. if(!netdata_random_session_id_matches(key)) {
  298. buffer_reset(wb);
  299. buffer_strcat(wb, "invalid key");
  300. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  301. return HTTP_RESP_FORBIDDEN;
  302. }
  303. if(!token || !base_url || !check_claim_param(token) || !check_claim_param(base_url) || (rooms && !check_claim_param(rooms))) {
  304. buffer_reset(wb);
  305. buffer_strcat(wb, "invalid parameters");
  306. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  307. return HTTP_RESP_BAD_REQUEST;
  308. }
  309. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  310. netdata_cloud_enabled = CONFIG_BOOLEAN_AUTO;
  311. appconfig_set_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", CONFIG_BOOLEAN_AUTO);
  312. appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", base_url);
  313. uuid_t claimed_id;
  314. uuid_generate_random(claimed_id);
  315. char claimed_id_str[UUID_STR_LEN];
  316. uuid_unparse_lower(claimed_id, claimed_id_str);
  317. BUFFER *t = buffer_create(1024, NULL);
  318. if(rooms)
  319. buffer_sprintf(t, "-id=%s -token=%s -rooms=%s", claimed_id_str, token, rooms);
  320. else
  321. buffer_sprintf(t, "-id=%s -token=%s", claimed_id_str, token);
  322. bool success = false;
  323. const char *msg = NULL;
  324. CLAIM_AGENT_RESPONSE rc = claim_agent(buffer_tostring(t), true, &msg);
  325. switch(rc) {
  326. case CLAIM_AGENT_OK:
  327. msg = "ok";
  328. success = true;
  329. can_be_claimed = false;
  330. claim_reload_all();
  331. {
  332. int ms = 0;
  333. do {
  334. status = cloud_status();
  335. if (status == CLOUD_STATUS_ONLINE)
  336. break;
  337. sleep_usec(100 * USEC_PER_MS);
  338. ms += 100;
  339. } while (ms < 5000);
  340. }
  341. break;
  342. case CLAIM_AGENT_NO_CLOUD_URL:
  343. msg = "No Netdata Cloud URL.";
  344. break;
  345. case CLAIM_AGENT_CLAIM_SCRIPT_FAILED:
  346. msg = "Claiming script failed.";
  347. break;
  348. case CLAIM_AGENT_CLOUD_DISABLED:
  349. msg = "Netdata Cloud is disabled on this agent.";
  350. break;
  351. case CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT:
  352. msg = "Failed to execute claiming script.";
  353. break;
  354. case CLAIM_AGENT_CLAIM_SCRIPT_RETURNED_INVALID_CODE:
  355. msg = "Claiming script returned invalid code.";
  356. break;
  357. default:
  358. case CLAIM_AGENT_FAILED_WITH_MESSAGE:
  359. if(!msg)
  360. msg = "Unknown error";
  361. break;
  362. }
  363. // our status may have changed
  364. // refresh the status in our output
  365. buffer_flush(wb);
  366. buffer_json_initialize(wb, "\"", "\"", 0, true, false);
  367. now_s = now_realtime_sec();
  368. buffer_json_cloud_status(wb, now_s);
  369. // and this is the status of the claiming command we run
  370. buffer_json_member_add_boolean(wb, "success", success);
  371. buffer_json_member_add_string(wb, "message", msg);
  372. }
  373. if(can_be_claimed)
  374. buffer_json_member_add_string(wb, "key_filename", netdata_random_session_id_get_filename());
  375. buffer_json_agents_v2(wb, NULL, now_s, false, false);
  376. buffer_json_finalize(wb);
  377. return HTTP_RESP_OK;
  378. }