claim.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "claim.h"
  3. #include "registry/registry_internals.h"
  4. #include "aclk/aclk.h"
  5. #include "aclk/aclk_proxy.h"
  6. char *claiming_pending_arguments = NULL;
  7. static char *claiming_errors[] = {
  8. "Agent claimed successfully", // 0
  9. "Unknown argument", // 1
  10. "Problems with claiming working directory", // 2
  11. "Missing dependencies", // 3
  12. "Failure to connect to endpoint", // 4
  13. "The CLI didn't work", // 5
  14. "Wrong user", // 6
  15. "Unknown HTTP error message", // 7
  16. "invalid node id", // 8
  17. "invalid node name", // 9
  18. "invalid room id", // 10
  19. "invalid public key", // 11
  20. "token expired/token not found/invalid token", // 12
  21. "already claimed", // 13
  22. "processing claiming", // 14
  23. "Internal Server Error", // 15
  24. "Gateway Timeout", // 16
  25. "Service Unavailable", // 17
  26. "Agent Unique Id Not Readable" // 18
  27. };
  28. /* Retrieve the claim id for the agent.
  29. * Caller owns the string.
  30. */
  31. char *get_agent_claimid()
  32. {
  33. char *result;
  34. rrdhost_aclk_state_lock(localhost);
  35. result = (localhost->aclk_state.claimed_id == NULL) ? NULL : strdupz(localhost->aclk_state.claimed_id);
  36. rrdhost_aclk_state_unlock(localhost);
  37. return result;
  38. }
  39. #define CLAIMING_COMMAND_LENGTH 16384
  40. #define CLAIMING_PROXY_LENGTH (CLAIMING_COMMAND_LENGTH/4)
  41. extern struct registry registry;
  42. /* rrd_init() and post_conf_load() must have been called before this function */
  43. CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, const char **msg __maybe_unused)
  44. {
  45. if (!force || !netdata_cloud_enabled) {
  46. netdata_log_error("Refusing to claim agent -> cloud functionality has been disabled");
  47. return CLAIM_AGENT_CLOUD_DISABLED;
  48. }
  49. #ifndef DISABLE_CLOUD
  50. int exit_code;
  51. pid_t command_pid;
  52. char command_exec_buffer[CLAIMING_COMMAND_LENGTH + 1];
  53. char command_line_buffer[CLAIMING_COMMAND_LENGTH + 1];
  54. FILE *fp_child_output, *fp_child_input;
  55. // This is guaranteed to be set early in main via post_conf_load()
  56. char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL);
  57. if (cloud_base_url == NULL) {
  58. internal_fatal(true, "Do not move the cloud base url out of post_conf_load!!");
  59. return CLAIM_AGENT_NO_CLOUD_URL;
  60. }
  61. const char *proxy_str;
  62. ACLK_PROXY_TYPE proxy_type;
  63. char proxy_flag[CLAIMING_PROXY_LENGTH] = "-noproxy";
  64. proxy_str = aclk_get_proxy(&proxy_type);
  65. if (proxy_type == PROXY_TYPE_SOCKS5 || proxy_type == PROXY_TYPE_HTTP)
  66. snprintf(proxy_flag, CLAIMING_PROXY_LENGTH, "-proxy=\"%s\"", proxy_str);
  67. snprintfz(command_exec_buffer, CLAIMING_COMMAND_LENGTH,
  68. "exec \"%s%snetdata-claim.sh\"",
  69. netdata_exe_path[0] ? netdata_exe_path : "",
  70. netdata_exe_path[0] ? "/" : ""
  71. );
  72. snprintfz(command_line_buffer,
  73. CLAIMING_COMMAND_LENGTH,
  74. "%s %s -hostname=%s -id=%s -url=%s -noreload %s",
  75. command_exec_buffer,
  76. proxy_flag,
  77. netdata_configured_hostname,
  78. localhost->machine_guid,
  79. cloud_base_url,
  80. claiming_arguments);
  81. netdata_log_info("Executing agent claiming command: %s", command_exec_buffer);
  82. fp_child_output = netdata_popen(command_line_buffer, &command_pid, &fp_child_input);
  83. if(!fp_child_output) {
  84. netdata_log_error("Cannot popen(\"%s\").", command_exec_buffer);
  85. return CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT;
  86. }
  87. netdata_log_info("Waiting for claiming command '%s' to finish.", command_exec_buffer);
  88. char read_buffer[100 + 1];
  89. while (fgets(read_buffer, 100, fp_child_output) != NULL) {;}
  90. exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
  91. netdata_log_info("Agent claiming command '%s' returned with code %d", command_exec_buffer, exit_code);
  92. if (0 == exit_code) {
  93. load_claiming_state();
  94. return CLAIM_AGENT_OK;
  95. }
  96. if (exit_code < 0) {
  97. netdata_log_error("Agent claiming command '%s' failed to complete its run", command_exec_buffer);
  98. return CLAIM_AGENT_CLAIM_SCRIPT_FAILED;
  99. }
  100. errno = 0;
  101. unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1;
  102. if ((unsigned)exit_code > maximum_known_exit_code) {
  103. netdata_log_error("Agent failed to be claimed with an unknown error. Cmd: '%s'", command_exec_buffer);
  104. return CLAIM_AGENT_CLAIM_SCRIPT_RETURNED_INVALID_CODE;
  105. }
  106. netdata_log_error("Agent failed to be claimed using the command '%s' with the following error message:",
  107. command_exec_buffer);
  108. netdata_log_error("\"%s\"", claiming_errors[exit_code]);
  109. if(msg) *msg = claiming_errors[exit_code];
  110. #else
  111. UNUSED(claiming_arguments);
  112. UNUSED(claiming_errors);
  113. #endif
  114. return CLAIM_AGENT_FAILED_WITH_MESSAGE;
  115. }
  116. #ifdef ENABLE_ACLK
  117. extern int aclk_connected, aclk_kill_link, aclk_disable_runtime;
  118. #endif
  119. /* Change the claimed state of the agent.
  120. *
  121. * This only happens when the user has explicitly requested it:
  122. * - via the cli tool by reloading the claiming state
  123. * - after spawning the claim because of a command-line argument
  124. * If this happens with the ACLK active under an old claim then we MUST KILL THE LINK
  125. */
  126. void load_claiming_state(void)
  127. {
  128. // --------------------------------------------------------------------
  129. // Check if the cloud is enabled
  130. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  131. netdata_cloud_enabled = false;
  132. #else
  133. uuid_t uuid;
  134. // Propagate into aclk and registry. Be kind of atomic...
  135. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  136. rrdhost_aclk_state_lock(localhost);
  137. if (localhost->aclk_state.claimed_id) {
  138. if (aclk_connected)
  139. localhost->aclk_state.prev_claimed_id = strdupz(localhost->aclk_state.claimed_id);
  140. freez(localhost->aclk_state.claimed_id);
  141. localhost->aclk_state.claimed_id = NULL;
  142. }
  143. if (aclk_connected)
  144. {
  145. netdata_log_info("Agent was already connected to Cloud - forcing reconnection under new credentials");
  146. aclk_kill_link = 1;
  147. }
  148. aclk_disable_runtime = 0;
  149. char filename[FILENAME_MAX + 1];
  150. snprintfz(filename, FILENAME_MAX, "%s/cloud.d/claimed_id", netdata_configured_varlib_dir);
  151. long bytes_read;
  152. char *claimed_id = read_by_filename(filename, &bytes_read);
  153. if(claimed_id && uuid_parse(claimed_id, uuid)) {
  154. netdata_log_error("claimed_id \"%s\" doesn't look like valid UUID", claimed_id);
  155. freez(claimed_id);
  156. claimed_id = NULL;
  157. }
  158. if(claimed_id) {
  159. localhost->aclk_state.claimed_id = mallocz(UUID_STR_LEN);
  160. uuid_unparse_lower(uuid, localhost->aclk_state.claimed_id);
  161. }
  162. invalidate_node_instances(&localhost->host_uuid, claimed_id ? &uuid : NULL);
  163. metaqueue_store_claim_id(&localhost->host_uuid, claimed_id ? &uuid : NULL);
  164. rrdhost_aclk_state_unlock(localhost);
  165. if (!claimed_id) {
  166. netdata_log_info("Unable to load '%s', setting state to AGENT_UNCLAIMED", filename);
  167. return;
  168. }
  169. freez(claimed_id);
  170. netdata_log_info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename);
  171. netdata_cloud_enabled = appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled);
  172. #endif
  173. }
  174. struct config cloud_config = { .first_section = NULL,
  175. .last_section = NULL,
  176. .mutex = NETDATA_MUTEX_INITIALIZER,
  177. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  178. .rwlock = AVL_LOCK_INITIALIZER } };
  179. void load_cloud_conf(int silent)
  180. {
  181. char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD");
  182. if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1))
  183. netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
  184. char *filename;
  185. errno = 0;
  186. int ret = 0;
  187. filename = strdupz_path_subpath(netdata_configured_varlib_dir, "cloud.d/cloud.conf");
  188. ret = appconfig_load(&cloud_config, filename, 1, NULL);
  189. if(!ret && !silent)
  190. netdata_log_info("CONFIG: cannot load cloud config '%s'. Running with internal defaults.", filename);
  191. freez(filename);
  192. // --------------------------------------------------------------------
  193. // Check if the cloud is enabled
  194. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  195. netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
  196. #else
  197. netdata_cloud_enabled = appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled);
  198. #endif
  199. // This must be set before any point in the code that accesses it. Do not move it from this function.
  200. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  201. }
  202. static char *netdata_random_session_id_filename = NULL;
  203. static uuid_t netdata_random_session_id = { 0 };
  204. bool netdata_random_session_id_generate(void) {
  205. static char guid[UUID_STR_LEN] = "";
  206. uuid_generate_random(netdata_random_session_id);
  207. uuid_unparse_lower(netdata_random_session_id, guid);
  208. char filename[FILENAME_MAX + 1];
  209. snprintfz(filename, FILENAME_MAX, "%s/netdata_random_session_id", netdata_configured_varlib_dir);
  210. bool ret = true;
  211. (void)unlink(filename);
  212. // save it
  213. int fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 640);
  214. if(fd == -1) {
  215. netdata_log_error("Cannot create random session id file '%s'.", filename);
  216. ret = false;
  217. }
  218. else {
  219. if (write(fd, guid, UUID_STR_LEN - 1) != UUID_STR_LEN - 1) {
  220. netdata_log_error("Cannot write the random session id file '%s'.", filename);
  221. ret = false;
  222. } else {
  223. ssize_t bytes = write(fd, "\n", 1);
  224. UNUSED(bytes);
  225. }
  226. close(fd);
  227. }
  228. if(ret && (!netdata_random_session_id_filename || strcmp(netdata_random_session_id_filename, filename) != 0)) {
  229. freez(netdata_random_session_id_filename);
  230. netdata_random_session_id_filename = strdupz(filename);
  231. }
  232. return ret;
  233. }
  234. const char *netdata_random_session_id_get_filename(void) {
  235. if(!netdata_random_session_id_filename)
  236. netdata_random_session_id_generate();
  237. return netdata_random_session_id_filename;
  238. }
  239. bool netdata_random_session_id_matches(const char *guid) {
  240. if(uuid_is_null(netdata_random_session_id))
  241. return false;
  242. uuid_t uuid;
  243. if(uuid_parse(guid, uuid))
  244. return false;
  245. if(uuid_compare(netdata_random_session_id, uuid) == 0)
  246. return true;
  247. return false;
  248. }
  249. static bool check_claim_param(const char *s) {
  250. if(!s || !*s) return true;
  251. do {
  252. if(isalnum(*s) || *s == '.' || *s == ',' || *s == '-' || *s == ':' || *s == '/' || *s == '_')
  253. ;
  254. else
  255. return false;
  256. } while(*++s);
  257. return true;
  258. }
  259. void claim_reload_all(void) {
  260. error_log_limit_unlimited();
  261. load_claiming_state();
  262. registry_update_cloud_base_url();
  263. rrdpush_send_claimed_id(localhost);
  264. error_log_limit_reset();
  265. }
  266. int api_v2_claim(struct web_client *w, char *url) {
  267. char *key = NULL;
  268. char *token = NULL;
  269. char *rooms = NULL;
  270. char *base_url = NULL;
  271. while (url) {
  272. char *value = strsep_skip_consecutive_separators(&url, "&");
  273. if (!value || !*value) continue;
  274. char *name = strsep_skip_consecutive_separators(&value, "=");
  275. if (!name || !*name) continue;
  276. if (!value || !*value) continue;
  277. if(!strcmp(name, "key"))
  278. key = value;
  279. else if(!strcmp(name, "token"))
  280. token = value;
  281. else if(!strcmp(name, "rooms"))
  282. rooms = value;
  283. else if(!strcmp(name, "url"))
  284. base_url = value;
  285. }
  286. BUFFER *wb = w->response.data;
  287. buffer_flush(wb);
  288. buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
  289. time_t now_s = now_realtime_sec();
  290. CLOUD_STATUS status = buffer_json_cloud_status(wb, now_s);
  291. bool can_be_claimed = false;
  292. switch(status) {
  293. case CLOUD_STATUS_AVAILABLE:
  294. case CLOUD_STATUS_DISABLED:
  295. case CLOUD_STATUS_OFFLINE:
  296. can_be_claimed = true;
  297. break;
  298. case CLOUD_STATUS_UNAVAILABLE:
  299. case CLOUD_STATUS_BANNED:
  300. case CLOUD_STATUS_ONLINE:
  301. can_be_claimed = false;
  302. break;
  303. }
  304. buffer_json_member_add_boolean(wb, "can_be_claimed", can_be_claimed);
  305. if(can_be_claimed && key) {
  306. if(!netdata_random_session_id_matches(key)) {
  307. buffer_reset(wb);
  308. buffer_strcat(wb, "invalid key");
  309. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  310. return HTTP_RESP_FORBIDDEN;
  311. }
  312. if(!token || !base_url || !check_claim_param(token) || !check_claim_param(base_url) || (rooms && !check_claim_param(rooms))) {
  313. buffer_reset(wb);
  314. buffer_strcat(wb, "invalid parameters");
  315. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  316. return HTTP_RESP_BAD_REQUEST;
  317. }
  318. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  319. netdata_cloud_enabled = CONFIG_BOOLEAN_AUTO;
  320. appconfig_set_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", CONFIG_BOOLEAN_AUTO);
  321. appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", base_url);
  322. uuid_t claimed_id;
  323. uuid_generate_random(claimed_id);
  324. char claimed_id_str[UUID_STR_LEN];
  325. uuid_unparse_lower(claimed_id, claimed_id_str);
  326. BUFFER *t = buffer_create(1024, NULL);
  327. if(rooms)
  328. buffer_sprintf(t, "-id=%s -token=%s -rooms=%s", claimed_id_str, token, rooms);
  329. else
  330. buffer_sprintf(t, "-id=%s -token=%s", claimed_id_str, token);
  331. bool success = false;
  332. const char *msg = NULL;
  333. CLAIM_AGENT_RESPONSE rc = claim_agent(buffer_tostring(t), true, &msg);
  334. switch(rc) {
  335. case CLAIM_AGENT_OK:
  336. msg = "ok";
  337. success = true;
  338. can_be_claimed = false;
  339. claim_reload_all();
  340. {
  341. int ms = 0;
  342. do {
  343. status = cloud_status();
  344. if (status == CLOUD_STATUS_ONLINE && __atomic_load_n(&localhost->node_id, __ATOMIC_RELAXED))
  345. break;
  346. sleep_usec(50 * USEC_PER_MS);
  347. ms += 50;
  348. } while (ms < 10000);
  349. }
  350. break;
  351. case CLAIM_AGENT_NO_CLOUD_URL:
  352. msg = "No Netdata Cloud URL.";
  353. break;
  354. case CLAIM_AGENT_CLAIM_SCRIPT_FAILED:
  355. msg = "Claiming script failed.";
  356. break;
  357. case CLAIM_AGENT_CLOUD_DISABLED:
  358. msg = "Netdata Cloud is disabled on this agent.";
  359. break;
  360. case CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT:
  361. msg = "Failed to execute claiming script.";
  362. break;
  363. case CLAIM_AGENT_CLAIM_SCRIPT_RETURNED_INVALID_CODE:
  364. msg = "Claiming script returned invalid code.";
  365. break;
  366. default:
  367. case CLAIM_AGENT_FAILED_WITH_MESSAGE:
  368. if(!msg)
  369. msg = "Unknown error";
  370. break;
  371. }
  372. // our status may have changed
  373. // refresh the status in our output
  374. buffer_flush(wb);
  375. buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
  376. now_s = now_realtime_sec();
  377. buffer_json_cloud_status(wb, now_s);
  378. // and this is the status of the claiming command we run
  379. buffer_json_member_add_boolean(wb, "success", success);
  380. buffer_json_member_add_string(wb, "message", msg);
  381. }
  382. if(can_be_claimed)
  383. buffer_json_member_add_string(wb, "key_filename", netdata_random_session_id_get_filename());
  384. buffer_json_agents_v2(wb, NULL, now_s, false, false);
  385. buffer_json_finalize(wb);
  386. return HTTP_RESP_OK;
  387. }