aclk_query.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "aclk_query.h"
  3. #include "aclk_stats.h"
  4. #include "aclk_tx_msgs.h"
  5. #define ACLK_QUERY_THREAD_NAME "ACLK_Query"
  6. #define WEB_HDR_ACCEPT_ENC "Accept-Encoding:"
  7. pthread_cond_t query_cond_wait = PTHREAD_COND_INITIALIZER;
  8. pthread_mutex_t query_lock_wait = PTHREAD_MUTEX_INITIALIZER;
  9. #define QUERY_THREAD_LOCK pthread_mutex_lock(&query_lock_wait)
  10. #define QUERY_THREAD_UNLOCK pthread_mutex_unlock(&query_lock_wait)
  11. static usec_t aclk_web_api_v1_request(RRDHOST *host, struct web_client *w, char *url)
  12. {
  13. usec_t t;
  14. t = now_monotonic_high_precision_usec();
  15. w->response.code = web_client_api_request_v1(host, w, url);
  16. t = now_monotonic_high_precision_usec() - t;
  17. if (aclk_stats_enabled) {
  18. ACLK_STATS_LOCK;
  19. aclk_metrics_per_sample.cloud_q_process_total += t;
  20. aclk_metrics_per_sample.cloud_q_process_count++;
  21. if (aclk_metrics_per_sample.cloud_q_process_max < t)
  22. aclk_metrics_per_sample.cloud_q_process_max = t;
  23. ACLK_STATS_UNLOCK;
  24. }
  25. return t;
  26. }
  27. static RRDHOST *node_id_2_rrdhost(const char *node_id)
  28. {
  29. int res;
  30. uuid_t node_id_bin, host_id_bin;
  31. rrd_rdlock();
  32. RRDHOST *host = find_host_by_node_id((char *) node_id);
  33. rrd_unlock();
  34. if (host)
  35. return host;
  36. char host_id[UUID_STR_LEN];
  37. if (uuid_parse(node_id, node_id_bin)) {
  38. error("Couldn't parse UUID %s", node_id);
  39. return NULL;
  40. }
  41. if ((res = get_host_id(&node_id_bin, &host_id_bin))) {
  42. error("node not found rc=%d", res);
  43. return NULL;
  44. }
  45. uuid_unparse_lower(host_id_bin, host_id);
  46. return rrdhost_find_by_guid(host_id, 0);
  47. }
  48. #define NODE_ID_QUERY "/node/"
  49. // TODO this function should be quarantied and written nicely
  50. // lots of skeletons from initial ACLK Legacy impl.
  51. // quick and dirty from the start
  52. static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
  53. {
  54. int retval = 0;
  55. usec_t t;
  56. BUFFER *local_buffer = NULL;
  57. BUFFER *log_buffer = buffer_create(NETDATA_WEB_REQUEST_URL_SIZE);
  58. RRDHOST *query_host = localhost;
  59. #ifdef NETDATA_WITH_ZLIB
  60. int z_ret;
  61. BUFFER *z_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
  62. char *start, *end;
  63. #endif
  64. struct web_client *w = (struct web_client *)callocz(1, sizeof(struct web_client));
  65. w->response.data = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
  66. w->response.header = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE);
  67. w->response.header_output = buffer_create(NETDATA_WEB_RESPONSE_HEADER_SIZE);
  68. strcpy(w->origin, "*"); // Simulate web_client_create_on_fd()
  69. w->cookie1[0] = 0; // Simulate web_client_create_on_fd()
  70. w->cookie2[0] = 0; // Simulate web_client_create_on_fd()
  71. w->acl = 0x1f;
  72. buffer_strcat(log_buffer, query->data.http_api_v2.query);
  73. size_t size = 0;
  74. size_t sent = 0;
  75. w->tv_in = query->created_tv;
  76. now_realtime_timeval(&w->tv_ready);
  77. if (query->timeout) {
  78. int in_queue = (int) (dt_usec(&w->tv_in, &w->tv_ready) / 1000);
  79. if (in_queue > query->timeout) {
  80. log_access("QUERY CANCELED: QUEUE TIME EXCEEDED %d ms (LIMIT %d ms)", in_queue, query->timeout);
  81. retval = 1;
  82. w->response.code = HTTP_RESP_BACKEND_FETCH_FAILED;
  83. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, NULL, 0);
  84. goto cleanup;
  85. }
  86. }
  87. RRDHOST *temp_host = NULL;
  88. if (!strncmp(query->data.http_api_v2.query, NODE_ID_QUERY, strlen(NODE_ID_QUERY))) {
  89. char *node_uuid = query->data.http_api_v2.query + strlen(NODE_ID_QUERY);
  90. char nodeid[UUID_STR_LEN];
  91. if (strlen(node_uuid) < (UUID_STR_LEN - 1)) {
  92. error_report(CLOUD_EMSG_MALFORMED_NODE_ID);
  93. retval = 1;
  94. w->response.code = 404;
  95. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_MALFORMED_NODE_ID, CLOUD_EMSG_MALFORMED_NODE_ID, NULL, 0);
  96. goto cleanup;
  97. }
  98. strncpyz(nodeid, node_uuid, UUID_STR_LEN - 1);
  99. query_host = node_id_2_rrdhost(nodeid);
  100. if (!query_host) {
  101. temp_host = sql_create_host_by_uuid(nodeid);
  102. if (!temp_host) {
  103. error_report("Host with node_id \"%s\" not found! Returning 404 to Cloud!", nodeid);
  104. retval = 1;
  105. w->response.code = 404;
  106. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_NODE_NOT_FOUND, CLOUD_EMSG_NODE_NOT_FOUND, NULL, 0);
  107. goto cleanup;
  108. }
  109. }
  110. }
  111. char *mysep = strchr(query->data.http_api_v2.query, '?');
  112. if (mysep) {
  113. url_decode_r(w->decoded_query_string, mysep, NETDATA_WEB_REQUEST_URL_SIZE + 1);
  114. *mysep = '\0';
  115. } else
  116. url_decode_r(w->decoded_query_string, query->data.http_api_v2.query, NETDATA_WEB_REQUEST_URL_SIZE + 1);
  117. mysep = strrchr(query->data.http_api_v2.query, '/');
  118. if (aclk_stats_enabled) {
  119. ACLK_STATS_LOCK;
  120. int stat_idx = aclk_cloud_req_http_type_to_idx(mysep ? mysep + 1 : "other");
  121. aclk_metrics_per_sample.cloud_req_http_by_type[stat_idx]++;
  122. ACLK_STATS_UNLOCK;
  123. }
  124. // execute the query
  125. t = aclk_web_api_v1_request(query_host ? query_host : temp_host, w, mysep ? mysep + 1 : "noop");
  126. free_temporary_host(temp_host);
  127. size = (w->mode == WEB_CLIENT_MODE_FILECOPY) ? w->response.rlen : w->response.data->len;
  128. sent = size;
  129. #ifdef NETDATA_WITH_ZLIB
  130. // check if gzip encoding can and should be used
  131. if ((start = strstr((char *)query->data.http_api_v2.payload, WEB_HDR_ACCEPT_ENC))) {
  132. start += strlen(WEB_HDR_ACCEPT_ENC);
  133. end = strstr(start, "\x0D\x0A");
  134. start = strstr(start, "gzip");
  135. if (start && start < end) {
  136. w->response.zstream.zalloc = Z_NULL;
  137. w->response.zstream.zfree = Z_NULL;
  138. w->response.zstream.opaque = Z_NULL;
  139. if(deflateInit2(&w->response.zstream, web_gzip_level, Z_DEFLATED, 15 + 16, 8, web_gzip_strategy) == Z_OK) {
  140. w->response.zinitialized = 1;
  141. w->response.zoutput = 1;
  142. } else
  143. error("Failed to initialize zlib. Proceeding without compression.");
  144. }
  145. }
  146. if (w->response.data->len && w->response.zinitialized) {
  147. w->response.zstream.next_in = (Bytef *)w->response.data->buffer;
  148. w->response.zstream.avail_in = w->response.data->len;
  149. do {
  150. w->response.zstream.avail_out = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE;
  151. w->response.zstream.next_out = w->response.zbuffer;
  152. z_ret = deflate(&w->response.zstream, Z_FINISH);
  153. if(z_ret < 0) {
  154. if(w->response.zstream.msg)
  155. error("Error compressing body. ZLIB error: \"%s\"", w->response.zstream.msg);
  156. else
  157. error("Unknown error during zlib compression.");
  158. retval = 1;
  159. w->response.code = 500;
  160. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_ZLIB_ERROR, CLOUD_EMSG_ZLIB_ERROR, NULL, 0);
  161. goto cleanup;
  162. }
  163. int bytes_to_cpy = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE - w->response.zstream.avail_out;
  164. buffer_need_bytes(z_buffer, bytes_to_cpy);
  165. memcpy(&z_buffer->buffer[z_buffer->len], w->response.zbuffer, bytes_to_cpy);
  166. z_buffer->len += bytes_to_cpy;
  167. } while(z_ret != Z_STREAM_END);
  168. // so that web_client_build_http_header
  169. // puts correct content length into header
  170. buffer_free(w->response.data);
  171. w->response.data = z_buffer;
  172. z_buffer = NULL;
  173. }
  174. #endif
  175. w->response.data->date = w->tv_ready.tv_sec;
  176. web_client_build_http_header(w);
  177. local_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE);
  178. local_buffer->contenttype = CT_APPLICATION_JSON;
  179. buffer_strcat(local_buffer, w->response.header_output->buffer);
  180. if (w->response.data->len) {
  181. #ifdef NETDATA_WITH_ZLIB
  182. if (w->response.zinitialized) {
  183. buffer_need_bytes(local_buffer, w->response.data->len);
  184. memcpy(&local_buffer->buffer[local_buffer->len], w->response.data->buffer, w->response.data->len);
  185. local_buffer->len += w->response.data->len;
  186. sent = sent - size + w->response.data->len;
  187. } else {
  188. #endif
  189. buffer_strcat(local_buffer, w->response.data->buffer);
  190. #ifdef NETDATA_WITH_ZLIB
  191. }
  192. #endif
  193. }
  194. // send msg.
  195. aclk_http_msg_v2(query_thr->client, query->callback_topic, query->msg_id, t, query->created, w->response.code, local_buffer->buffer, local_buffer->len);
  196. struct timeval tv;
  197. cleanup:
  198. now_realtime_timeval(&tv);
  199. log_access("%llu: %d '[ACLK]:%d' '%s' (sent/all = %zu/%zu bytes %0.0f%%, prep/sent/total = %0.2f/%0.2f/%0.2f ms) %d '%s'",
  200. w->id
  201. , gettid()
  202. , query_thr->idx
  203. , "DATA"
  204. , sent
  205. , size
  206. , size > sent ? -(((size - sent) / (double)size) * 100.0) : ((size > 0) ? (((sent - size ) / (double)size) * 100.0) : 0.0)
  207. , dt_usec(&w->tv_ready, &w->tv_in) / 1000.0
  208. , dt_usec(&tv, &w->tv_ready) / 1000.0
  209. , dt_usec(&tv, &w->tv_in) / 1000.0
  210. , w->response.code
  211. , strip_control_characters((char *)buffer_tostring(log_buffer))
  212. );
  213. #ifdef NETDATA_WITH_ZLIB
  214. if(w->response.zinitialized)
  215. deflateEnd(&w->response.zstream);
  216. buffer_free(z_buffer);
  217. #endif
  218. buffer_free(w->response.data);
  219. buffer_free(w->response.header);
  220. buffer_free(w->response.header_output);
  221. freez(w);
  222. buffer_free(local_buffer);
  223. buffer_free(log_buffer);
  224. return retval;
  225. }
  226. static int send_bin_msg(struct aclk_query_thread *query_thr, aclk_query_t query)
  227. {
  228. // this will be simplified when legacy support is removed
  229. aclk_send_bin_message_subtopic_pid(query_thr->client, query->data.bin_payload.payload, query->data.bin_payload.size, query->data.bin_payload.topic, query->data.bin_payload.msg_name);
  230. return 0;
  231. }
  232. const char *aclk_query_get_name(aclk_query_type_t qt)
  233. {
  234. switch (qt) {
  235. case HTTP_API_V2: return "http_api_request_v2";
  236. case REGISTER_NODE: return "register_node";
  237. case NODE_STATE_UPDATE: return "node_state_update";
  238. case CHART_DIMS_UPDATE: return "chart_and_dim_update";
  239. case CHART_CONFIG_UPDATED: return "chart_config_updated";
  240. case CHART_RESET: return "reset_chart_messages";
  241. case RETENTION_UPDATED: return "update_retention_info";
  242. case UPDATE_NODE_INFO: return "update_node_info";
  243. case ALARM_LOG_HEALTH: return "alarm_log_health";
  244. case ALARM_PROVIDE_CFG: return "provide_alarm_config";
  245. case ALARM_SNAPSHOT: return "alarm_snapshot";
  246. case UPDATE_NODE_COLLECTORS: return "update_node_collectors";
  247. case PROTO_BIN_MESSAGE: return "generic_binary_proto_message";
  248. default:
  249. error_report("Unknown query type used %d", (int) qt);
  250. return "unknown";
  251. }
  252. }
  253. static void aclk_query_process_msg(struct aclk_query_thread *query_thr, aclk_query_t query)
  254. {
  255. if (query->type == UNKNOWN || query->type >= ACLK_QUERY_TYPE_COUNT) {
  256. error_report("Unknown query in query queue. %u", query->type);
  257. aclk_query_free(query);
  258. return;
  259. }
  260. worker_is_busy(query->type);
  261. if (query->type == HTTP_API_V2) {
  262. debug(D_ACLK, "Processing Queued Message of type: \"http_api_request_v2\"");
  263. http_api_v2(query_thr, query);
  264. } else {
  265. debug(D_ACLK, "Processing Queued Message of type: \"%s\"", query->data.bin_payload.msg_name);
  266. send_bin_msg(query_thr, query);
  267. }
  268. if (aclk_stats_enabled) {
  269. ACLK_STATS_LOCK;
  270. aclk_metrics_per_sample.queries_dispatched++;
  271. aclk_queries_per_thread[query_thr->idx]++;
  272. aclk_metrics_per_sample.queries_per_type[query->type]++;
  273. ACLK_STATS_UNLOCK;
  274. }
  275. aclk_query_free(query);
  276. worker_is_idle();
  277. }
  278. /* Processes messages from queue. Compete for work with other threads
  279. */
  280. int aclk_query_process_msgs(struct aclk_query_thread *query_thr)
  281. {
  282. aclk_query_t query;
  283. while ((query = aclk_queue_pop()))
  284. aclk_query_process_msg(query_thr, query);
  285. return 0;
  286. }
  287. static void worker_aclk_register(void) {
  288. worker_register("ACLKQUERY");
  289. for (int i = 1; i < ACLK_QUERY_TYPE_COUNT; i++) {
  290. worker_register_job_name(i, aclk_query_get_name(i));
  291. }
  292. }
  293. /**
  294. * Main query processing thread
  295. */
  296. void *aclk_query_main_thread(void *ptr)
  297. {
  298. worker_aclk_register();
  299. struct aclk_query_thread *query_thr = ptr;
  300. while (!netdata_exit) {
  301. aclk_query_process_msgs(query_thr);
  302. worker_is_idle();
  303. QUERY_THREAD_LOCK;
  304. if (unlikely(pthread_cond_wait(&query_cond_wait, &query_lock_wait)))
  305. sleep_usec(USEC_PER_SEC * 1);
  306. QUERY_THREAD_UNLOCK;
  307. }
  308. worker_unregister();
  309. return NULL;
  310. }
  311. #define TASK_LEN_MAX 22
  312. void aclk_query_threads_start(struct aclk_query_threads *query_threads, mqtt_wss_client client)
  313. {
  314. info("Starting %d query threads.", query_threads->count);
  315. char thread_name[TASK_LEN_MAX];
  316. query_threads->thread_list = callocz(query_threads->count, sizeof(struct aclk_query_thread));
  317. for (int i = 0; i < query_threads->count; i++) {
  318. query_threads->thread_list[i].idx = i; //thread needs to know its index for statistics
  319. if(unlikely(snprintfz(thread_name, TASK_LEN_MAX, "%s_%d", ACLK_QUERY_THREAD_NAME, i) < 0))
  320. error("snprintf encoding error");
  321. netdata_thread_create(
  322. &query_threads->thread_list[i].thread, thread_name, NETDATA_THREAD_OPTION_JOINABLE, aclk_query_main_thread,
  323. &query_threads->thread_list[i]);
  324. query_threads->thread_list[i].client = client;
  325. }
  326. }
  327. void aclk_query_threads_cleanup(struct aclk_query_threads *query_threads)
  328. {
  329. if (query_threads && query_threads->thread_list) {
  330. for (int i = 0; i < query_threads->count; i++) {
  331. netdata_thread_join(query_threads->thread_list[i].thread, NULL);
  332. }
  333. freez(query_threads->thread_list);
  334. }
  335. aclk_queue_lock();
  336. aclk_queue_flush();
  337. }