aclk_query.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "aclk_query.h"
  3. #include "aclk_stats.h"
  4. #include "aclk_tx_msgs.h"
  5. #define ACLK_QUERY_THREAD_NAME "ACLK_Query"
  6. #define WEB_HDR_ACCEPT_ENC "Accept-Encoding:"
  7. pthread_cond_t query_cond_wait = PTHREAD_COND_INITIALIZER;
  8. pthread_mutex_t query_lock_wait = PTHREAD_MUTEX_INITIALIZER;
  9. #define QUERY_THREAD_LOCK pthread_mutex_lock(&query_lock_wait)
  10. #define QUERY_THREAD_UNLOCK pthread_mutex_unlock(&query_lock_wait)
  11. typedef struct aclk_query_handler {
  12. aclk_query_type_t type;
  13. char *name; // for logging purposes
  14. int(*fnc)(struct aclk_query_thread *query_thr, aclk_query_t query);
  15. } aclk_query_handler;
  16. static int info_metadata(struct aclk_query_thread *query_thr, aclk_query_t query)
  17. {
  18. aclk_send_info_metadata(query_thr->client,
  19. !query->data.metadata_info.initial_on_connect,
  20. query->data.metadata_info.host);
  21. return 0;
  22. }
  23. static int alarms_metadata(struct aclk_query_thread *query_thr, aclk_query_t query)
  24. {
  25. aclk_send_alarm_metadata(query_thr->client,
  26. !query->data.metadata_info.initial_on_connect);
  27. return 0;
  28. }
  29. static usec_t aclk_web_api_v1_request(RRDHOST *host, struct web_client *w, char *url)
  30. {
  31. usec_t t;
  32. t = now_monotonic_high_precision_usec();
  33. w->response.code = web_client_api_request_v1(host, w, url);
  34. t = now_monotonic_high_precision_usec() - t;
  35. if (aclk_stats_enabled) {
  36. ACLK_STATS_LOCK;
  37. aclk_metrics_per_sample.cloud_q_process_total += t;
  38. aclk_metrics_per_sample.cloud_q_process_count++;
  39. if (aclk_metrics_per_sample.cloud_q_process_max < t)
  40. aclk_metrics_per_sample.cloud_q_process_max = t;
  41. ACLK_STATS_UNLOCK;
  42. }
  43. return t;
  44. }
  45. static RRDHOST *node_id_2_rrdhost(const char *node_id)
  46. {
  47. int res;
  48. uuid_t node_id_bin, host_id_bin;
  49. rrd_rdlock();
  50. RRDHOST *host = find_host_by_node_id((char *) node_id);
  51. rrd_unlock();
  52. if (host)
  53. return host;
  54. char host_id[UUID_STR_LEN];
  55. if (uuid_parse(node_id, node_id_bin)) {
  56. error("Couldn't parse UUID %s", node_id);
  57. return NULL;
  58. }
  59. if ((res = get_host_id(&node_id_bin, &host_id_bin))) {
  60. error("node not found rc=%d", res);
  61. return NULL;
  62. }
  63. uuid_unparse_lower(host_id_bin, host_id);
  64. return rrdhost_find_by_guid(host_id, 0);
  65. }
  66. #define NODE_ID_QUERY "/node/"
  67. // TODO this function should be quarantied and written nicely
  68. // lots of skeletons from initial ACLK Legacy impl.
  69. // quick and dirty from the start
  70. static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
  71. {
  72. int retval = 0;
  73. usec_t t;
  74. BUFFER *local_buffer = NULL;
  75. BUFFER *log_buffer = buffer_create(NETDATA_WEB_REQUEST_URL_SIZE);
  76. RRDHOST *query_host = localhost;
  77. #ifdef NETDATA_WITH_ZLIB
  78. int z_ret;
  79. BUFFER *z_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
  80. char *start, *end;
  81. #endif
  82. struct web_client *w = (struct web_client *)callocz(1, sizeof(struct web_client));
  83. w->response.data = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
  84. w->response.header = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE);
  85. w->response.header_output = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE);
  86. strcpy(w->origin, "*"); // Simulate web_client_create_on_fd()
  87. w->cookie1[0] = 0; // Simulate web_client_create_on_fd()
  88. w->cookie2[0] = 0; // Simulate web_client_create_on_fd()
  89. w->acl = 0x1f;
  90. buffer_strcat(log_buffer, query->data.http_api_v2.query);
  91. size_t size = 0;
  92. size_t sent = 0;
  93. w->tv_in = query->created_tv;
  94. now_realtime_timeval(&w->tv_ready);
  95. if (!strncmp(query->data.http_api_v2.query, NODE_ID_QUERY, strlen(NODE_ID_QUERY))) {
  96. char *node_uuid = query->data.http_api_v2.query + strlen(NODE_ID_QUERY);
  97. char nodeid[UUID_STR_LEN];
  98. if (strlen(node_uuid) < (UUID_STR_LEN - 1)) {
  99. error_report(CLOUD_EMSG_MALFORMED_NODE_ID);
  100. retval = 1;
  101. w->response.code = 404;
  102. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_MALFORMED_NODE_ID, CLOUD_EMSG_MALFORMED_NODE_ID, NULL, 0);
  103. goto cleanup;
  104. }
  105. strncpyz(nodeid, node_uuid, UUID_STR_LEN - 1);
  106. query_host = node_id_2_rrdhost(nodeid);
  107. if (!query_host) {
  108. error_report("Host with node_id \"%s\" not found! Returning 404 to Cloud!", nodeid);
  109. retval = 1;
  110. w->response.code = 404;
  111. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_NODE_NOT_FOUND, CLOUD_EMSG_NODE_NOT_FOUND, NULL, 0);
  112. goto cleanup;
  113. }
  114. }
  115. char *mysep = strchr(query->data.http_api_v2.query, '?');
  116. if (mysep) {
  117. url_decode_r(w->decoded_query_string, mysep, NETDATA_WEB_REQUEST_URL_SIZE + 1);
  118. *mysep = '\0';
  119. } else
  120. url_decode_r(w->decoded_query_string, query->data.http_api_v2.query, NETDATA_WEB_REQUEST_URL_SIZE + 1);
  121. mysep = strrchr(query->data.http_api_v2.query, '/');
  122. if (aclk_stats_enabled) {
  123. ACLK_STATS_LOCK;
  124. int stat_idx = aclk_cloud_req_http_type_to_idx(mysep ? mysep + 1 : "other");
  125. aclk_metrics_per_sample.cloud_req_http_by_type[stat_idx]++;
  126. ACLK_STATS_UNLOCK;
  127. }
  128. // execute the query
  129. t = aclk_web_api_v1_request(query_host, w, mysep ? mysep + 1 : "noop");
  130. size = (w->mode == WEB_CLIENT_MODE_FILECOPY) ? w->response.rlen : w->response.data->len;
  131. sent = size;
  132. #ifdef NETDATA_WITH_ZLIB
  133. // check if gzip encoding can and should be used
  134. if ((start = strstr((char *)query->data.http_api_v2.payload, WEB_HDR_ACCEPT_ENC))) {
  135. start += strlen(WEB_HDR_ACCEPT_ENC);
  136. end = strstr(start, "\x0D\x0A");
  137. start = strstr(start, "gzip");
  138. if (start && start < end) {
  139. w->response.zstream.zalloc = Z_NULL;
  140. w->response.zstream.zfree = Z_NULL;
  141. w->response.zstream.opaque = Z_NULL;
  142. if(deflateInit2(&w->response.zstream, web_gzip_level, Z_DEFLATED, 15 + 16, 8, web_gzip_strategy) == Z_OK) {
  143. w->response.zinitialized = 1;
  144. w->response.zoutput = 1;
  145. } else
  146. error("Failed to initialize zlib. Proceeding without compression.");
  147. }
  148. }
  149. if (w->response.data->len && w->response.zinitialized) {
  150. w->response.zstream.next_in = (Bytef *)w->response.data->buffer;
  151. w->response.zstream.avail_in = w->response.data->len;
  152. do {
  153. w->response.zstream.avail_out = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE;
  154. w->response.zstream.next_out = w->response.zbuffer;
  155. z_ret = deflate(&w->response.zstream, Z_FINISH);
  156. if(z_ret < 0) {
  157. if(w->response.zstream.msg)
  158. error("Error compressing body. ZLIB error: \"%s\"", w->response.zstream.msg);
  159. else
  160. error("Unknown error during zlib compression.");
  161. retval = 1;
  162. w->response.code = 500;
  163. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_ZLIB_ERROR, CLOUD_EMSG_ZLIB_ERROR, NULL, 0);
  164. goto cleanup;
  165. }
  166. int bytes_to_cpy = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE - w->response.zstream.avail_out;
  167. buffer_need_bytes(z_buffer, bytes_to_cpy);
  168. memcpy(&z_buffer->buffer[z_buffer->len], w->response.zbuffer, bytes_to_cpy);
  169. z_buffer->len += bytes_to_cpy;
  170. } while(z_ret != Z_STREAM_END);
  171. // so that web_client_build_http_header
  172. // puts correct content length into header
  173. buffer_free(w->response.data);
  174. w->response.data = z_buffer;
  175. z_buffer = NULL;
  176. }
  177. #endif
  178. w->response.data->date = w->tv_ready.tv_sec;
  179. web_client_build_http_header(w);
  180. local_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
  181. local_buffer->contenttype = CT_APPLICATION_JSON;
  182. buffer_strcat(local_buffer, w->response.header_output->buffer);
  183. if (w->response.data->len) {
  184. #ifdef NETDATA_WITH_ZLIB
  185. if (w->response.zinitialized) {
  186. buffer_need_bytes(local_buffer, w->response.data->len);
  187. memcpy(&local_buffer->buffer[local_buffer->len], w->response.data->buffer, w->response.data->len);
  188. local_buffer->len += w->response.data->len;
  189. sent = sent - size + w->response.data->len;
  190. } else {
  191. #endif
  192. buffer_strcat(local_buffer, w->response.data->buffer);
  193. #ifdef NETDATA_WITH_ZLIB
  194. }
  195. #endif
  196. }
  197. // send msg.
  198. aclk_http_msg_v2(query_thr->client, query->callback_topic, query->msg_id, t, query->created, w->response.code, local_buffer->buffer, local_buffer->len);
  199. struct timeval tv;
  200. cleanup:
  201. now_realtime_timeval(&tv);
  202. log_access("%llu: %d '[ACLK]:%d' '%s' (sent/all = %zu/%zu bytes %0.0f%%, prep/sent/total = %0.2f/%0.2f/%0.2f ms) %d '%s'",
  203. w->id
  204. , gettid()
  205. , query_thr->idx
  206. , "DATA"
  207. , sent
  208. , size
  209. , size > sent ? -(((size - sent) / (double)size) * 100.0) : ((size > 0) ? (((sent - size ) / (double)size) * 100.0) : 0.0)
  210. , dt_usec(&w->tv_ready, &w->tv_in) / 1000.0
  211. , dt_usec(&tv, &w->tv_ready) / 1000.0
  212. , dt_usec(&tv, &w->tv_in) / 1000.0
  213. , w->response.code
  214. , strip_control_characters((char *)buffer_tostring(log_buffer))
  215. );
  216. #ifdef NETDATA_WITH_ZLIB
  217. if(w->response.zinitialized)
  218. deflateEnd(&w->response.zstream);
  219. buffer_free(z_buffer);
  220. #endif
  221. buffer_free(w->response.data);
  222. buffer_free(w->response.header);
  223. buffer_free(w->response.header_output);
  224. freez(w);
  225. buffer_free(local_buffer);
  226. buffer_free(log_buffer);
  227. return retval;
  228. }
  229. static int chart_query(struct aclk_query_thread *query_thr, aclk_query_t query)
  230. {
  231. aclk_chart_msg(query_thr->client, query->data.chart_add_del.host, query->data.chart_add_del.chart_name);
  232. return 0;
  233. }
  234. static int alarm_state_update_query(struct aclk_query_thread *query_thr, aclk_query_t query)
  235. {
  236. aclk_alarm_state_msg(query_thr->client, query->data.alarm_update);
  237. // aclk_alarm_state_msg frees the json object including the header it generates
  238. query->data.alarm_update = NULL;
  239. return 0;
  240. }
  241. #ifdef ENABLE_NEW_CLOUD_PROTOCOL
  242. static int register_node(struct aclk_query_thread *query_thr, aclk_query_t query) {
  243. // TODO create a pending registrations list
  244. // with some timeouts to detect registration requests that
  245. // go unanswered from the cloud
  246. aclk_generate_node_registration(query_thr->client, &query->data.node_creation);
  247. return 0;
  248. }
  249. static int node_state_update(struct aclk_query_thread *query_thr, aclk_query_t query) {
  250. // TODO create a pending registrations list
  251. // with some timeouts to detect registration requests that
  252. // go unanswered from the cloud
  253. aclk_generate_node_state_update(query_thr->client, &query->data.node_update);
  254. return 0;
  255. }
  256. static int send_bin_msg(struct aclk_query_thread *query_thr, aclk_query_t query)
  257. {
  258. // this will be simplified when legacy support is removed
  259. aclk_send_bin_message_subtopic_pid(query_thr->client, query->data.bin_payload.payload, query->data.bin_payload.size, query->data.bin_payload.topic, query->data.bin_payload.msg_name);
  260. return 0;
  261. }
  262. #endif
  263. aclk_query_handler aclk_query_handlers[] = {
  264. { .type = HTTP_API_V2, .name = "http_api_request_v2", .fnc = http_api_v2 },
  265. { .type = ALARM_STATE_UPDATE, .name = "alarm_state_update", .fnc = alarm_state_update_query },
  266. { .type = METADATA_INFO, .name = "info_metadata", .fnc = info_metadata },
  267. { .type = METADATA_ALARMS, .name = "alarms_metadata", .fnc = alarms_metadata },
  268. { .type = CHART_NEW, .name = "chart_new", .fnc = chart_query },
  269. { .type = CHART_DEL, .name = "chart_delete", .fnc = info_metadata },
  270. #ifdef ENABLE_NEW_CLOUD_PROTOCOL
  271. { .type = REGISTER_NODE, .name = "register_node", .fnc = register_node },
  272. { .type = NODE_STATE_UPDATE, .name = "node_state_update", .fnc = node_state_update },
  273. { .type = CHART_DIMS_UPDATE, .name = "chart_and_dim_update", .fnc = send_bin_msg },
  274. { .type = CHART_CONFIG_UPDATED, .name = "chart_config_updated", .fnc = send_bin_msg },
  275. { .type = CHART_RESET, .name = "reset_chart_messages", .fnc = send_bin_msg },
  276. { .type = RETENTION_UPDATED, .name = "update_retention_info", .fnc = send_bin_msg },
  277. { .type = UPDATE_NODE_INFO, .name = "update_node_info", .fnc = send_bin_msg },
  278. { .type = ALARM_LOG_HEALTH, .name = "alarm_log_health", .fnc = send_bin_msg },
  279. { .type = ALARM_PROVIDE_CFG, .name = "provide_alarm_config", .fnc = send_bin_msg },
  280. { .type = ALARM_SNAPSHOT, .name = "alarm_snapshot", .fnc = send_bin_msg },
  281. #endif
  282. { .type = UNKNOWN, .name = NULL, .fnc = NULL }
  283. };
  284. const char *aclk_query_get_name(aclk_query_type_t qt)
  285. {
  286. aclk_query_handler *ptr = aclk_query_handlers;
  287. while (ptr->type != UNKNOWN) {
  288. if (ptr->type == qt)
  289. return ptr->name;
  290. ptr++;
  291. }
  292. return "unknown";
  293. }
  294. static void aclk_query_process_msg(struct aclk_query_thread *query_thr, aclk_query_t query)
  295. {
  296. for (int i = 0; aclk_query_handlers[i].type != UNKNOWN; i++) {
  297. if (aclk_query_handlers[i].type == query->type) {
  298. debug(D_ACLK, "Processing Queued Message of type: \"%s\"", aclk_query_handlers[i].name);
  299. aclk_query_handlers[i].fnc(query_thr, query);
  300. if (aclk_stats_enabled) {
  301. ACLK_STATS_LOCK;
  302. aclk_metrics_per_sample.queries_dispatched++;
  303. aclk_queries_per_thread[query_thr->idx]++;
  304. aclk_metrics_per_sample.queries_per_type[query->type]++;
  305. ACLK_STATS_UNLOCK;
  306. }
  307. aclk_query_free(query);
  308. return;
  309. }
  310. }
  311. fatal("Unknown query in query queue. %u", query->type);
  312. }
  313. /* Processes messages from queue. Compete for work with other threads
  314. */
  315. int aclk_query_process_msgs(struct aclk_query_thread *query_thr)
  316. {
  317. aclk_query_t query;
  318. while ((query = aclk_queue_pop()))
  319. aclk_query_process_msg(query_thr, query);
  320. return 0;
  321. }
  322. /**
  323. * Main query processing thread
  324. */
  325. void *aclk_query_main_thread(void *ptr)
  326. {
  327. struct aclk_query_thread *query_thr = ptr;
  328. while (!netdata_exit) {
  329. aclk_query_process_msgs(query_thr);
  330. QUERY_THREAD_LOCK;
  331. if (unlikely(pthread_cond_wait(&query_cond_wait, &query_lock_wait)))
  332. sleep_usec(USEC_PER_SEC * 1);
  333. QUERY_THREAD_UNLOCK;
  334. }
  335. return NULL;
  336. }
  337. #define TASK_LEN_MAX 22
  338. void aclk_query_threads_start(struct aclk_query_threads *query_threads, mqtt_wss_client client)
  339. {
  340. info("Starting %d query threads.", query_threads->count);
  341. char thread_name[TASK_LEN_MAX];
  342. query_threads->thread_list = callocz(query_threads->count, sizeof(struct aclk_query_thread));
  343. for (int i = 0; i < query_threads->count; i++) {
  344. query_threads->thread_list[i].idx = i; //thread needs to know its index for statistics
  345. if(unlikely(snprintfz(thread_name, TASK_LEN_MAX, "%s_%d", ACLK_QUERY_THREAD_NAME, i) < 0))
  346. error("snprintf encoding error");
  347. netdata_thread_create(
  348. &query_threads->thread_list[i].thread, thread_name, NETDATA_THREAD_OPTION_JOINABLE, aclk_query_main_thread,
  349. &query_threads->thread_list[i]);
  350. query_threads->thread_list[i].client = client;
  351. }
  352. }
  353. void aclk_query_threads_cleanup(struct aclk_query_threads *query_threads)
  354. {
  355. if (query_threads && query_threads->thread_list) {
  356. for (int i = 0; i < query_threads->count; i++) {
  357. netdata_thread_join(query_threads->thread_list[i].thread, NULL);
  358. }
  359. freez(query_threads->thread_list);
  360. }
  361. aclk_queue_lock();
  362. aclk_queue_flush();
  363. }