claim.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "claim.h"
  3. #include "registry/registry_internals.h"
  4. #include "aclk/aclk.h"
  5. #include "aclk/aclk_proxy.h"
  6. char *claiming_pending_arguments = NULL;
  7. static char *claiming_errors[] = {
  8. "Agent claimed successfully", // 0
  9. "Unknown argument", // 1
  10. "Problems with claiming working directory", // 2
  11. "Missing dependencies", // 3
  12. "Failure to connect to endpoint", // 4
  13. "The CLI didn't work", // 5
  14. "Wrong user", // 6
  15. "Unknown HTTP error message", // 7
  16. "invalid node id", // 8
  17. "invalid node name", // 9
  18. "invalid room id", // 10
  19. "invalid public key", // 11
  20. "token expired/token not found/invalid token", // 12
  21. "already claimed", // 13
  22. "processing claiming", // 14
  23. "Internal Server Error", // 15
  24. "Gateway Timeout", // 16
  25. "Service Unavailable", // 17
  26. "Agent Unique Id Not Readable" // 18
  27. };
  28. /* Retrieve the claim id for the agent.
  29. * Caller owns the string.
  30. */
  31. char *get_agent_claimid()
  32. {
  33. char *result;
  34. rrdhost_aclk_state_lock(localhost);
  35. result = (localhost->aclk_state.claimed_id == NULL) ? NULL : strdupz(localhost->aclk_state.claimed_id);
  36. rrdhost_aclk_state_unlock(localhost);
  37. return result;
  38. }
  39. #define CLAIMING_COMMAND_LENGTH 16384
  40. #define CLAIMING_PROXY_LENGTH (CLAIMING_COMMAND_LENGTH/4)
  41. /* rrd_init() and post_conf_load() must have been called before this function */
  42. CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, const char **msg __maybe_unused)
  43. {
  44. if (!force || !netdata_cloud_enabled) {
  45. netdata_log_error("Refusing to claim agent -> cloud functionality has been disabled");
  46. return CLAIM_AGENT_CLOUD_DISABLED;
  47. }
  48. #ifndef DISABLE_CLOUD
  49. int exit_code;
  50. pid_t command_pid;
  51. char command_exec_buffer[CLAIMING_COMMAND_LENGTH + 1];
  52. char command_line_buffer[CLAIMING_COMMAND_LENGTH + 1];
  53. FILE *fp_child_output, *fp_child_input;
  54. // This is guaranteed to be set early in main via post_conf_load()
  55. char *cloud_base_url = appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", NULL);
  56. if (cloud_base_url == NULL) {
  57. internal_fatal(true, "Do not move the cloud base url out of post_conf_load!!");
  58. return CLAIM_AGENT_NO_CLOUD_URL;
  59. }
  60. const char *proxy_str;
  61. ACLK_PROXY_TYPE proxy_type;
  62. char proxy_flag[CLAIMING_PROXY_LENGTH] = "-noproxy";
  63. proxy_str = aclk_get_proxy(&proxy_type);
  64. if (proxy_type == PROXY_TYPE_SOCKS5 || proxy_type == PROXY_TYPE_HTTP)
  65. snprintf(proxy_flag, CLAIMING_PROXY_LENGTH, "-proxy=\"%s\"", proxy_str);
  66. snprintfz(command_exec_buffer, CLAIMING_COMMAND_LENGTH,
  67. "exec \"%s%snetdata-claim.sh\"",
  68. netdata_exe_path[0] ? netdata_exe_path : "",
  69. netdata_exe_path[0] ? "/" : ""
  70. );
  71. snprintfz(command_line_buffer,
  72. CLAIMING_COMMAND_LENGTH,
  73. "%s %s -hostname=%s -id=%s -url=%s -noreload %s",
  74. command_exec_buffer,
  75. proxy_flag,
  76. netdata_configured_hostname,
  77. localhost->machine_guid,
  78. cloud_base_url,
  79. claiming_arguments);
  80. netdata_log_info("Executing agent claiming command: %s", command_exec_buffer);
  81. fp_child_output = netdata_popen(command_line_buffer, &command_pid, &fp_child_input);
  82. if(!fp_child_output) {
  83. netdata_log_error("Cannot popen(\"%s\").", command_exec_buffer);
  84. return CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT;
  85. }
  86. netdata_log_info("Waiting for claiming command '%s' to finish.", command_exec_buffer);
  87. char read_buffer[100 + 1];
  88. while (fgets(read_buffer, 100, fp_child_output) != NULL) ;
  89. exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
  90. netdata_log_info("Agent claiming command '%s' returned with code %d", command_exec_buffer, exit_code);
  91. if (0 == exit_code) {
  92. load_claiming_state();
  93. return CLAIM_AGENT_OK;
  94. }
  95. if (exit_code < 0) {
  96. netdata_log_error("Agent claiming command '%s' failed to complete its run", command_exec_buffer);
  97. return CLAIM_AGENT_CLAIM_SCRIPT_FAILED;
  98. }
  99. errno = 0;
  100. unsigned maximum_known_exit_code = sizeof(claiming_errors) / sizeof(claiming_errors[0]) - 1;
  101. if ((unsigned)exit_code > maximum_known_exit_code) {
  102. netdata_log_error("Agent failed to be claimed with an unknown error. Cmd: '%s'", command_exec_buffer);
  103. return CLAIM_AGENT_CLAIM_SCRIPT_RETURNED_INVALID_CODE;
  104. }
  105. netdata_log_error("Agent failed to be claimed using the command '%s' with the following error message:",
  106. command_exec_buffer);
  107. netdata_log_error("\"%s\"", claiming_errors[exit_code]);
  108. if(msg) *msg = claiming_errors[exit_code];
  109. #else
  110. UNUSED(claiming_arguments);
  111. UNUSED(claiming_errors);
  112. #endif
  113. return CLAIM_AGENT_FAILED_WITH_MESSAGE;
  114. }
  115. /* Change the claimed state of the agent.
  116. *
  117. * This only happens when the user has explicitly requested it:
  118. * - via the cli tool by reloading the claiming state
  119. * - after spawning the claim because of a command-line argument
  120. * If this happens with the ACLK active under an old claim then we MUST KILL THE LINK
  121. */
  122. void load_claiming_state(void)
  123. {
  124. // --------------------------------------------------------------------
  125. // Check if the cloud is enabled
  126. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  127. netdata_cloud_enabled = false;
  128. #else
  129. uuid_t uuid;
  130. // Propagate into aclk and registry. Be kind of atomic...
  131. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  132. rrdhost_aclk_state_lock(localhost);
  133. if (localhost->aclk_state.claimed_id) {
  134. if (aclk_connected)
  135. localhost->aclk_state.prev_claimed_id = strdupz(localhost->aclk_state.claimed_id);
  136. freez(localhost->aclk_state.claimed_id);
  137. localhost->aclk_state.claimed_id = NULL;
  138. }
  139. if (aclk_connected)
  140. {
  141. netdata_log_info("Agent was already connected to Cloud - forcing reconnection under new credentials");
  142. aclk_kill_link = 1;
  143. }
  144. aclk_disable_runtime = 0;
  145. char filename[FILENAME_MAX + 1];
  146. snprintfz(filename, FILENAME_MAX, "%s/cloud.d/claimed_id", netdata_configured_varlib_dir);
  147. long bytes_read;
  148. char *claimed_id = read_by_filename(filename, &bytes_read);
  149. if(claimed_id && uuid_parse(claimed_id, uuid)) {
  150. netdata_log_error("claimed_id \"%s\" doesn't look like valid UUID", claimed_id);
  151. freez(claimed_id);
  152. claimed_id = NULL;
  153. }
  154. if(claimed_id) {
  155. localhost->aclk_state.claimed_id = mallocz(UUID_STR_LEN);
  156. uuid_unparse_lower(uuid, localhost->aclk_state.claimed_id);
  157. }
  158. rrdhost_aclk_state_unlock(localhost);
  159. invalidate_node_instances(&localhost->host_uuid, claimed_id ? &uuid : NULL);
  160. metaqueue_store_claim_id(&localhost->host_uuid, claimed_id ? &uuid : NULL);
  161. if (!claimed_id) {
  162. netdata_log_info("Unable to load '%s', setting state to AGENT_UNCLAIMED", filename);
  163. return;
  164. }
  165. freez(claimed_id);
  166. netdata_log_info("File '%s' was found. Setting state to AGENT_CLAIMED.", filename);
  167. netdata_cloud_enabled = appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled);
  168. #endif
  169. }
  170. struct config cloud_config = { .first_section = NULL,
  171. .last_section = NULL,
  172. .mutex = NETDATA_MUTEX_INITIALIZER,
  173. .index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
  174. .rwlock = AVL_LOCK_INITIALIZER } };
  175. void load_cloud_conf(int silent)
  176. {
  177. char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD");
  178. if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1))
  179. netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
  180. char *filename;
  181. errno = 0;
  182. int ret = 0;
  183. filename = strdupz_path_subpath(netdata_configured_varlib_dir, "cloud.d/cloud.conf");
  184. ret = appconfig_load(&cloud_config, filename, 1, NULL);
  185. if(!ret && !silent)
  186. netdata_log_info("CONFIG: cannot load cloud config '%s'. Running with internal defaults.", filename);
  187. freez(filename);
  188. // --------------------------------------------------------------------
  189. // Check if the cloud is enabled
  190. #if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
  191. netdata_cloud_enabled = CONFIG_BOOLEAN_NO;
  192. #else
  193. netdata_cloud_enabled = appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled);
  194. #endif
  195. // This must be set before any point in the code that accesses it. Do not move it from this function.
  196. appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
  197. }
  198. static char *netdata_random_session_id_filename = NULL;
  199. static uuid_t netdata_random_session_id = { 0 };
  200. bool netdata_random_session_id_generate(void) {
  201. static char guid[UUID_STR_LEN] = "";
  202. uuid_generate_random(netdata_random_session_id);
  203. uuid_unparse_lower(netdata_random_session_id, guid);
  204. char filename[FILENAME_MAX + 1];
  205. snprintfz(filename, FILENAME_MAX, "%s/netdata_random_session_id", netdata_configured_varlib_dir);
  206. bool ret = true;
  207. (void)unlink(filename);
  208. // save it
  209. int fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 640);
  210. if(fd == -1) {
  211. netdata_log_error("Cannot create random session id file '%s'.", filename);
  212. ret = false;
  213. }
  214. else {
  215. if (write(fd, guid, UUID_STR_LEN - 1) != UUID_STR_LEN - 1) {
  216. netdata_log_error("Cannot write the random session id file '%s'.", filename);
  217. ret = false;
  218. } else {
  219. ssize_t bytes = write(fd, "\n", 1);
  220. UNUSED(bytes);
  221. }
  222. close(fd);
  223. }
  224. if(ret && (!netdata_random_session_id_filename || strcmp(netdata_random_session_id_filename, filename) != 0)) {
  225. freez(netdata_random_session_id_filename);
  226. netdata_random_session_id_filename = strdupz(filename);
  227. }
  228. return ret;
  229. }
  230. const char *netdata_random_session_id_get_filename(void) {
  231. if(!netdata_random_session_id_filename)
  232. netdata_random_session_id_generate();
  233. return netdata_random_session_id_filename;
  234. }
  235. bool netdata_random_session_id_matches(const char *guid) {
  236. if(uuid_is_null(netdata_random_session_id))
  237. return false;
  238. uuid_t uuid;
  239. if(uuid_parse(guid, uuid))
  240. return false;
  241. if(uuid_compare(netdata_random_session_id, uuid) == 0)
  242. return true;
  243. return false;
  244. }
  245. static bool check_claim_param(const char *s) {
  246. if(!s || !*s) return true;
  247. do {
  248. if(isalnum(*s) || *s == '.' || *s == ',' || *s == '-' || *s == ':' || *s == '/' || *s == '_')
  249. ;
  250. else
  251. return false;
  252. } while(*++s);
  253. return true;
  254. }
  255. void claim_reload_all(void) {
  256. nd_log_limits_unlimited();
  257. load_claiming_state();
  258. registry_update_cloud_base_url();
  259. rrdpush_send_claimed_id(localhost);
  260. nd_log_limits_reset();
  261. }
  262. int api_v2_claim(struct web_client *w, char *url) {
  263. char *key = NULL;
  264. char *token = NULL;
  265. char *rooms = NULL;
  266. char *base_url = NULL;
  267. while (url) {
  268. char *value = strsep_skip_consecutive_separators(&url, "&");
  269. if (!value || !*value) continue;
  270. char *name = strsep_skip_consecutive_separators(&value, "=");
  271. if (!name || !*name) continue;
  272. if (!value || !*value) continue;
  273. if(!strcmp(name, "key"))
  274. key = value;
  275. else if(!strcmp(name, "token"))
  276. token = value;
  277. else if(!strcmp(name, "rooms"))
  278. rooms = value;
  279. else if(!strcmp(name, "url"))
  280. base_url = value;
  281. }
  282. BUFFER *wb = w->response.data;
  283. buffer_flush(wb);
  284. buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
  285. time_t now_s = now_realtime_sec();
  286. CLOUD_STATUS status = buffer_json_cloud_status(wb, now_s);
  287. bool can_be_claimed = false;
  288. switch(status) {
  289. case CLOUD_STATUS_AVAILABLE:
  290. case CLOUD_STATUS_DISABLED:
  291. case CLOUD_STATUS_OFFLINE:
  292. can_be_claimed = true;
  293. break;
  294. case CLOUD_STATUS_UNAVAILABLE:
  295. case CLOUD_STATUS_BANNED:
  296. case CLOUD_STATUS_ONLINE:
  297. can_be_claimed = false;
  298. break;
  299. }
  300. buffer_json_member_add_boolean(wb, "can_be_claimed", can_be_claimed);
  301. if(can_be_claimed && key) {
  302. if(!netdata_random_session_id_matches(key)) {
  303. buffer_reset(wb);
  304. buffer_strcat(wb, "invalid key");
  305. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  306. return HTTP_RESP_FORBIDDEN;
  307. }
  308. if(!token || !base_url || !check_claim_param(token) || !check_claim_param(base_url) || (rooms && !check_claim_param(rooms))) {
  309. buffer_reset(wb);
  310. buffer_strcat(wb, "invalid parameters");
  311. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  312. return HTTP_RESP_BAD_REQUEST;
  313. }
  314. netdata_random_session_id_generate(); // generate a new key, to avoid an attack to find it
  315. netdata_cloud_enabled = CONFIG_BOOLEAN_AUTO;
  316. appconfig_set_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", CONFIG_BOOLEAN_AUTO);
  317. appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", base_url);
  318. uuid_t claimed_id;
  319. uuid_generate_random(claimed_id);
  320. char claimed_id_str[UUID_STR_LEN];
  321. uuid_unparse_lower(claimed_id, claimed_id_str);
  322. BUFFER *t = buffer_create(1024, NULL);
  323. if(rooms)
  324. buffer_sprintf(t, "-id=%s -token=%s -rooms=%s", claimed_id_str, token, rooms);
  325. else
  326. buffer_sprintf(t, "-id=%s -token=%s", claimed_id_str, token);
  327. bool success = false;
  328. const char *msg = NULL;
  329. CLAIM_AGENT_RESPONSE rc = claim_agent(buffer_tostring(t), true, &msg);
  330. switch(rc) {
  331. case CLAIM_AGENT_OK:
  332. msg = "ok";
  333. success = true;
  334. can_be_claimed = false;
  335. claim_reload_all();
  336. {
  337. int ms = 0;
  338. do {
  339. status = cloud_status();
  340. if (status == CLOUD_STATUS_ONLINE && __atomic_load_n(&localhost->node_id, __ATOMIC_RELAXED))
  341. break;
  342. sleep_usec(50 * USEC_PER_MS);
  343. ms += 50;
  344. } while (ms < 10000);
  345. }
  346. break;
  347. case CLAIM_AGENT_NO_CLOUD_URL:
  348. msg = "No Netdata Cloud URL.";
  349. break;
  350. case CLAIM_AGENT_CLAIM_SCRIPT_FAILED:
  351. msg = "Claiming script failed.";
  352. break;
  353. case CLAIM_AGENT_CLOUD_DISABLED:
  354. msg = "Netdata Cloud is disabled on this agent.";
  355. break;
  356. case CLAIM_AGENT_CANNOT_EXECUTE_CLAIM_SCRIPT:
  357. msg = "Failed to execute claiming script.";
  358. break;
  359. case CLAIM_AGENT_CLAIM_SCRIPT_RETURNED_INVALID_CODE:
  360. msg = "Claiming script returned invalid code.";
  361. break;
  362. default:
  363. case CLAIM_AGENT_FAILED_WITH_MESSAGE:
  364. if(!msg)
  365. msg = "Unknown error";
  366. break;
  367. }
  368. // our status may have changed
  369. // refresh the status in our output
  370. buffer_flush(wb);
  371. buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
  372. now_s = now_realtime_sec();
  373. buffer_json_cloud_status(wb, now_s);
  374. // and this is the status of the claiming command we run
  375. buffer_json_member_add_boolean(wb, "success", success);
  376. buffer_json_member_add_string(wb, "message", msg);
  377. }
  378. if(can_be_claimed)
  379. buffer_json_member_add_string(wb, "key_filename", netdata_random_session_id_get_filename());
  380. buffer_json_agents_v2(wb, NULL, now_s, false, false);
  381. buffer_json_finalize(wb);
  382. return HTTP_RESP_OK;
  383. }