aclk_query.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "aclk_query.h"
  3. #include "aclk_stats.h"
  4. #include "aclk_tx_msgs.h"
  5. #include "../../web/server/web_client_cache.h"
  6. #define WEB_HDR_ACCEPT_ENC "Accept-Encoding:"
  7. #define ACLK_MAX_WEB_RESPONSE_SIZE (30 * 1024 * 1024)
  8. pthread_cond_t query_cond_wait = PTHREAD_COND_INITIALIZER;
  9. pthread_mutex_t query_lock_wait = PTHREAD_MUTEX_INITIALIZER;
  10. #define QUERY_THREAD_LOCK pthread_mutex_lock(&query_lock_wait)
  11. #define QUERY_THREAD_UNLOCK pthread_mutex_unlock(&query_lock_wait)
  12. struct pending_req_list {
  13. const char *msg_id;
  14. uint32_t hash;
  15. int canceled;
  16. struct pending_req_list *next;
  17. };
  18. static struct pending_req_list *pending_req_list_head = NULL;
  19. static pthread_mutex_t pending_req_list_lock = PTHREAD_MUTEX_INITIALIZER;
  20. static struct pending_req_list *pending_req_list_add(const char *msg_id)
  21. {
  22. struct pending_req_list *new = callocz(1, sizeof(struct pending_req_list));
  23. new->msg_id = msg_id;
  24. new->hash = simple_hash(msg_id);
  25. pthread_mutex_lock(&pending_req_list_lock);
  26. new->next = pending_req_list_head;
  27. pending_req_list_head = new;
  28. pthread_mutex_unlock(&pending_req_list_lock);
  29. return new;
  30. }
  31. void pending_req_list_rm(const char *msg_id)
  32. {
  33. uint32_t hash = simple_hash(msg_id);
  34. struct pending_req_list *prev = NULL;
  35. pthread_mutex_lock(&pending_req_list_lock);
  36. struct pending_req_list *curr = pending_req_list_head;
  37. while (curr) {
  38. if (curr->hash == hash && strcmp(curr->msg_id, msg_id) == 0) {
  39. if (prev)
  40. prev->next = curr->next;
  41. else
  42. pending_req_list_head = curr->next;
  43. freez(curr);
  44. break;
  45. }
  46. prev = curr;
  47. curr = curr->next;
  48. }
  49. pthread_mutex_unlock(&pending_req_list_lock);
  50. }
  51. int mark_pending_req_cancelled(const char *msg_id)
  52. {
  53. uint32_t hash = simple_hash(msg_id);
  54. pthread_mutex_lock(&pending_req_list_lock);
  55. struct pending_req_list *curr = pending_req_list_head;
  56. while (curr) {
  57. if (curr->hash == hash && strcmp(curr->msg_id, msg_id) == 0) {
  58. curr->canceled = 1;
  59. pthread_mutex_unlock(&pending_req_list_lock);
  60. return 0;
  61. }
  62. curr = curr->next;
  63. }
  64. pthread_mutex_unlock(&pending_req_list_lock);
  65. return 1;
  66. }
  67. static bool aclk_web_client_interrupt_cb(struct web_client *w __maybe_unused, void *data)
  68. {
  69. struct pending_req_list *req = (struct pending_req_list *)data;
  70. return req->canceled;
  71. }
  72. static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query) {
  73. int retval = 0;
  74. BUFFER *local_buffer = NULL;
  75. size_t size = 0;
  76. size_t sent = 0;
  77. #ifdef NETDATA_WITH_ZLIB
  78. int z_ret;
  79. BUFFER *z_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE, &netdata_buffers_statistics.buffers_aclk);
  80. char *start, *end;
  81. #endif
  82. struct web_client *w = web_client_get_from_cache();
  83. w->acl = WEB_CLIENT_ACL_ACLK;
  84. w->mode = WEB_CLIENT_MODE_GET;
  85. w->timings.tv_in = query->created_tv;
  86. w->interrupt.callback = aclk_web_client_interrupt_cb;
  87. w->interrupt.callback_data = pending_req_list_add(query->msg_id);
  88. usec_t t;
  89. web_client_timeout_checkpoint_set(w, query->timeout);
  90. if(web_client_timeout_checkpoint_and_check(w, &t)) {
  91. log_access("QUERY CANCELED: QUEUE TIME EXCEEDED %llu ms (LIMIT %d ms)", t / USEC_PER_MS, query->timeout);
  92. retval = 1;
  93. w->response.code = HTTP_RESP_BACKEND_FETCH_FAILED;
  94. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, NULL, 0);
  95. goto cleanup;
  96. }
  97. web_client_decode_path_and_query_string(w, query->data.http_api_v2.query);
  98. char *path = (char *)buffer_tostring(w->url_path_decoded);
  99. if (aclk_stats_enabled) {
  100. char *url_path_endpoint = strrchr(path, '/');
  101. ACLK_STATS_LOCK;
  102. int stat_idx = aclk_cloud_req_http_type_to_idx(url_path_endpoint ? url_path_endpoint + 1 : "other");
  103. aclk_metrics_per_sample.cloud_req_http_by_type[stat_idx]++;
  104. ACLK_STATS_UNLOCK;
  105. }
  106. w->response.code = web_client_api_request_with_node_selection(localhost, w, path);
  107. web_client_timeout_checkpoint_response_ready(w, &t);
  108. if(buffer_strlen(w->response.data) > ACLK_MAX_WEB_RESPONSE_SIZE) {
  109. buffer_flush(w->response.data);
  110. buffer_strcat(w->response.data, "response is too big");
  111. w->response.data->content_type = CT_TEXT_PLAIN;
  112. w->response.code = HTTP_RESP_CONTENT_TOO_LONG;
  113. }
  114. if (aclk_stats_enabled) {
  115. ACLK_STATS_LOCK;
  116. aclk_metrics_per_sample.cloud_q_process_total += t;
  117. aclk_metrics_per_sample.cloud_q_process_count++;
  118. if (aclk_metrics_per_sample.cloud_q_process_max < t)
  119. aclk_metrics_per_sample.cloud_q_process_max = t;
  120. ACLK_STATS_UNLOCK;
  121. }
  122. size = w->response.data->len;
  123. sent = size;
  124. #ifdef NETDATA_WITH_ZLIB
  125. // check if gzip encoding can and should be used
  126. if ((start = strstr((char *)query->data.http_api_v2.payload, WEB_HDR_ACCEPT_ENC))) {
  127. start += strlen(WEB_HDR_ACCEPT_ENC);
  128. end = strstr(start, "\x0D\x0A");
  129. start = strstr(start, "gzip");
  130. if (start && start < end) {
  131. w->response.zstream.zalloc = Z_NULL;
  132. w->response.zstream.zfree = Z_NULL;
  133. w->response.zstream.opaque = Z_NULL;
  134. if(deflateInit2(&w->response.zstream, web_gzip_level, Z_DEFLATED, 15 + 16, 8, web_gzip_strategy) == Z_OK) {
  135. w->response.zinitialized = true;
  136. w->response.zoutput = true;
  137. } else
  138. error("Failed to initialize zlib. Proceeding without compression.");
  139. }
  140. }
  141. if (w->response.data->len && w->response.zinitialized) {
  142. w->response.zstream.next_in = (Bytef *)w->response.data->buffer;
  143. w->response.zstream.avail_in = w->response.data->len;
  144. do {
  145. w->response.zstream.avail_out = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE;
  146. w->response.zstream.next_out = w->response.zbuffer;
  147. z_ret = deflate(&w->response.zstream, Z_FINISH);
  148. if(z_ret < 0) {
  149. if(w->response.zstream.msg)
  150. error("Error compressing body. ZLIB error: \"%s\"", w->response.zstream.msg);
  151. else
  152. error("Unknown error during zlib compression.");
  153. retval = 1;
  154. w->response.code = 500;
  155. aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_ZLIB_ERROR, CLOUD_EMSG_ZLIB_ERROR, NULL, 0);
  156. goto cleanup;
  157. }
  158. int bytes_to_cpy = NETDATA_WEB_RESPONSE_ZLIB_CHUNK_SIZE - w->response.zstream.avail_out;
  159. buffer_need_bytes(z_buffer, bytes_to_cpy);
  160. memcpy(&z_buffer->buffer[z_buffer->len], w->response.zbuffer, bytes_to_cpy);
  161. z_buffer->len += bytes_to_cpy;
  162. } while(z_ret != Z_STREAM_END);
  163. // so that web_client_build_http_header
  164. // puts correct content length into header
  165. buffer_free(w->response.data);
  166. w->response.data = z_buffer;
  167. z_buffer = NULL;
  168. }
  169. #endif
  170. w->response.data->date = w->timings.tv_ready.tv_sec;
  171. web_client_build_http_header(w);
  172. local_buffer = buffer_create(NETDATA_WEB_RESPONSE_INITIAL_SIZE, &netdata_buffers_statistics.buffers_aclk);
  173. local_buffer->content_type = CT_APPLICATION_JSON;
  174. buffer_strcat(local_buffer, w->response.header_output->buffer);
  175. if (w->response.data->len) {
  176. #ifdef NETDATA_WITH_ZLIB
  177. if (w->response.zinitialized) {
  178. buffer_need_bytes(local_buffer, w->response.data->len);
  179. memcpy(&local_buffer->buffer[local_buffer->len], w->response.data->buffer, w->response.data->len);
  180. local_buffer->len += w->response.data->len;
  181. sent = sent - size + w->response.data->len;
  182. } else {
  183. #endif
  184. buffer_strcat(local_buffer, w->response.data->buffer);
  185. #ifdef NETDATA_WITH_ZLIB
  186. }
  187. #endif
  188. }
  189. // send msg.
  190. aclk_http_msg_v2(query_thr->client, query->callback_topic, query->msg_id, t, query->created, w->response.code, local_buffer->buffer, local_buffer->len);
  191. struct timeval tv;
  192. cleanup:
  193. now_monotonic_high_precision_timeval(&tv);
  194. log_access("%llu: %d '[ACLK]:%d' '%s' (sent/all = %zu/%zu bytes %0.0f%%, prep/sent/total = %0.2f/%0.2f/%0.2f ms) %d '%s'",
  195. w->id
  196. , gettid()
  197. , query_thr->idx
  198. , "DATA"
  199. , sent
  200. , size
  201. , size > sent ? -(((size - sent) / (double)size) * 100.0) : ((size > 0) ? (((sent - size ) / (double)size) * 100.0) : 0.0)
  202. , dt_usec(&w->timings.tv_ready, &w->timings.tv_in) / 1000.0
  203. , dt_usec(&tv, &w->timings.tv_ready) / 1000.0
  204. , dt_usec(&tv, &w->timings.tv_in) / 1000.0
  205. , w->response.code
  206. , strip_control_characters((char *)buffer_tostring(w->url_as_received))
  207. );
  208. web_client_release_to_cache(w);
  209. pending_req_list_rm(query->msg_id);
  210. #ifdef NETDATA_WITH_ZLIB
  211. buffer_free(z_buffer);
  212. #endif
  213. buffer_free(local_buffer);
  214. return retval;
  215. }
  216. static int send_bin_msg(struct aclk_query_thread *query_thr, aclk_query_t query)
  217. {
  218. // this will be simplified when legacy support is removed
  219. aclk_send_bin_message_subtopic_pid(query_thr->client, query->data.bin_payload.payload, query->data.bin_payload.size, query->data.bin_payload.topic, query->data.bin_payload.msg_name);
  220. return 0;
  221. }
  222. const char *aclk_query_get_name(aclk_query_type_t qt, int unknown_ok)
  223. {
  224. switch (qt) {
  225. case HTTP_API_V2: return "http_api_request_v2";
  226. case REGISTER_NODE: return "register_node";
  227. case NODE_STATE_UPDATE: return "node_state_update";
  228. case CHART_DIMS_UPDATE: return "chart_and_dim_update";
  229. case CHART_CONFIG_UPDATED: return "chart_config_updated";
  230. case CHART_RESET: return "reset_chart_messages";
  231. case RETENTION_UPDATED: return "update_retention_info";
  232. case UPDATE_NODE_INFO: return "update_node_info";
  233. case ALARM_PROVIDE_CHECKPOINT: return "alarm_checkpoint";
  234. case ALARM_PROVIDE_CFG: return "provide_alarm_config";
  235. case ALARM_SNAPSHOT: return "alarm_snapshot";
  236. case UPDATE_NODE_COLLECTORS: return "update_node_collectors";
  237. case PROTO_BIN_MESSAGE: return "generic_binary_proto_message";
  238. default:
  239. if (!unknown_ok)
  240. error_report("Unknown query type used %d", (int) qt);
  241. return "unknown";
  242. }
  243. }
  244. static void aclk_query_process_msg(struct aclk_query_thread *query_thr, aclk_query_t query)
  245. {
  246. if (query->type == UNKNOWN || query->type >= ACLK_QUERY_TYPE_COUNT) {
  247. error_report("Unknown query in query queue. %u", query->type);
  248. aclk_query_free(query);
  249. return;
  250. }
  251. worker_is_busy(query->type);
  252. if (query->type == HTTP_API_V2) {
  253. debug(D_ACLK, "Processing Queued Message of type: \"http_api_request_v2\"");
  254. http_api_v2(query_thr, query);
  255. } else {
  256. debug(D_ACLK, "Processing Queued Message of type: \"%s\"", query->data.bin_payload.msg_name);
  257. send_bin_msg(query_thr, query);
  258. }
  259. if (aclk_stats_enabled) {
  260. ACLK_STATS_LOCK;
  261. aclk_metrics_per_sample.queries_dispatched++;
  262. aclk_queries_per_thread[query_thr->idx]++;
  263. aclk_metrics_per_sample.queries_per_type[query->type]++;
  264. ACLK_STATS_UNLOCK;
  265. }
  266. aclk_query_free(query);
  267. worker_is_idle();
  268. }
  269. /* Processes messages from queue. Compete for work with other threads
  270. */
  271. int aclk_query_process_msgs(struct aclk_query_thread *query_thr)
  272. {
  273. aclk_query_t query;
  274. while ((query = aclk_queue_pop()))
  275. aclk_query_process_msg(query_thr, query);
  276. return 0;
  277. }
  278. static void worker_aclk_register(void) {
  279. worker_register("ACLKQUERY");
  280. for (int i = 1; i < ACLK_QUERY_TYPE_COUNT; i++) {
  281. worker_register_job_name(i, aclk_query_get_name(i, 0));
  282. }
  283. }
  284. static void aclk_query_request_cancel(void *data)
  285. {
  286. pthread_cond_broadcast((pthread_cond_t *) data);
  287. }
  288. /**
  289. * Main query processing thread
  290. */
  291. void *aclk_query_main_thread(void *ptr)
  292. {
  293. worker_aclk_register();
  294. struct aclk_query_thread *query_thr = ptr;
  295. service_register(SERVICE_THREAD_TYPE_NETDATA, aclk_query_request_cancel, NULL, &query_cond_wait, false);
  296. while (service_running(SERVICE_ACLK | ABILITY_DATA_QUERIES)) {
  297. aclk_query_process_msgs(query_thr);
  298. worker_is_idle();
  299. QUERY_THREAD_LOCK;
  300. if (unlikely(pthread_cond_wait(&query_cond_wait, &query_lock_wait)))
  301. sleep_usec(USEC_PER_SEC * 1);
  302. QUERY_THREAD_UNLOCK;
  303. }
  304. worker_unregister();
  305. return NULL;
  306. }
  307. #define TASK_LEN_MAX 22
  308. void aclk_query_threads_start(struct aclk_query_threads *query_threads, mqtt_wss_client client)
  309. {
  310. info("Starting %d query threads.", query_threads->count);
  311. char thread_name[TASK_LEN_MAX];
  312. query_threads->thread_list = callocz(query_threads->count, sizeof(struct aclk_query_thread));
  313. for (int i = 0; i < query_threads->count; i++) {
  314. query_threads->thread_list[i].idx = i; //thread needs to know its index for statistics
  315. query_threads->thread_list[i].client = client;
  316. if(unlikely(snprintfz(thread_name, TASK_LEN_MAX, "ACLK_QRY[%d]", i) < 0))
  317. error("snprintf encoding error");
  318. netdata_thread_create(
  319. &query_threads->thread_list[i].thread, thread_name, NETDATA_THREAD_OPTION_JOINABLE, aclk_query_main_thread,
  320. &query_threads->thread_list[i]);
  321. }
  322. }
  323. void aclk_query_threads_cleanup(struct aclk_query_threads *query_threads)
  324. {
  325. if (query_threads && query_threads->thread_list) {
  326. for (int i = 0; i < query_threads->count; i++) {
  327. netdata_thread_join(query_threads->thread_list[i].thread, NULL);
  328. }
  329. freez(query_threads->thread_list);
  330. }
  331. aclk_queue_lock();
  332. aclk_queue_flush();
  333. }