parser.c 63 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. /** @file parser.c
  3. * @brief API to parse and search logs
  4. */
  5. #if !defined(_XOPEN_SOURCE) && !defined(__DARWIN__) && !defined(__APPLE__) && !defined(__FreeBSD__)
  6. /* _XOPEN_SOURCE 700 required by strptime (POSIX 2004) and strndup (POSIX 2008)
  7. * Will need to find a cleaner way of doing this, as currently defining
  8. * _XOPEN_SOURCE 700 can cause issues on Centos 7, MacOS and FreeBSD too. */
  9. #define _XOPEN_SOURCE 700
  10. /* _BSD_SOURCE (glibc <= 2.19) and _DEFAULT_SOURCE (glibc >= 2.20) are required
  11. * to silence "warning: implicit declaration of function ‘strsep’;" that is
  12. * included through libnetdata/inlined.h. */
  13. #define _BSD_SOURCE
  14. #define _DEFAULT_SOURCE
  15. #include <time.h>
  16. #endif
  17. #include "parser.h"
  18. #include "helper.h"
  19. #include <stdio.h>
  20. #include <sys/resource.h>
  21. #include <math.h>
  22. #include <string.h>
  23. static regex_t vhost_regex, req_client_regex, cipher_suite_regex;
  24. const char* const csv_auto_format_guess_matrix[] = {
  25. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time $upstream_response_time", // csvVhostCustom4
  26. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time", // csvVhostCustom3
  27. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - -", // csvVhostCombined
  28. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time $upstream_response_time", // csvVhostCustom2
  29. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time", // csvVhostCustom1
  30. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent", // csvVhostCommon
  31. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time $upstream_response_time", // csvCustom4
  32. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time", // csvCustom3
  33. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - -", // csvCombined
  34. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time $upstream_response_time", // csvCustom2
  35. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time", // csvCustom1
  36. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent", // csvCommon
  37. NULL}
  38. ;
  39. UNIT_STATIC int count_fields(const char *line, const char delimiter){
  40. const char *ptr;
  41. int cnt, fQuote;
  42. for (cnt = 1, fQuote = 0, ptr = line; *ptr != '\n' && *ptr != '\r' && *ptr != '\0'; ptr++ ){
  43. if (fQuote) {
  44. if (*ptr == '\"') {
  45. if ( ptr[1] == '\"' ) {
  46. ptr++;
  47. continue;
  48. }
  49. fQuote = 0;
  50. }
  51. continue;
  52. }
  53. if(*ptr == '\"'){
  54. fQuote = 1;
  55. continue;
  56. }
  57. if(*ptr == delimiter){
  58. cnt++;
  59. while(*(ptr+1) == delimiter){ ptr++;};
  60. continue;
  61. }
  62. }
  63. if (fQuote) {
  64. return -1;
  65. }
  66. return cnt;
  67. }
  68. /**
  69. * @brief Parse a delimited string into an array of strings.
  70. * @details Given a string containing no linebreaks, or containing line breaks
  71. * which are escaped by "double quotes", extract a NULL-terminated
  72. * array of strings, one for every delimiter-separated value in the row.
  73. * @param[in] line The input string to be parsed.
  74. * @param[in] delimiter The delimiter to be used to split the string.
  75. * @param[in] num_fields The expected number of fields in \p line. If a negative
  76. * number is provided, they will be counted.
  77. * @return A NULL-terminated array of strings with the delimited values in \p line,
  78. * or NULL in any other case.
  79. * @todo This function has not been benchmarked or optimised.
  80. */
  81. static inline char **parse_csv( const char *line, const char delimiter, int num_fields) {
  82. char **buf, **bptr, *tmp, *tptr;
  83. const char *ptr;
  84. int fQuote, fEnd;
  85. if(num_fields < 0){
  86. num_fields = count_fields(line, delimiter);
  87. if ( num_fields == -1 ) {
  88. return NULL;
  89. }
  90. }
  91. buf = mallocz( sizeof(char*) * (num_fields+1) );
  92. tmp = mallocz( strlen(line) + 1 );
  93. bptr = buf;
  94. for ( ptr = line, fQuote = 0, *tmp = '\0', tptr = tmp, fEnd = 0; ; ptr++ ) {
  95. if ( fQuote ) {
  96. if ( !*ptr ) {
  97. break;
  98. }
  99. if ( *ptr == '\"' ) {
  100. if ( ptr[1] == '\"' ) {
  101. *tptr++ = '\"';
  102. ptr++;
  103. continue;
  104. }
  105. fQuote = 0;
  106. }
  107. else {
  108. *tptr++ = *ptr;
  109. }
  110. continue;
  111. }
  112. if(*ptr == '\"'){
  113. fQuote = 1;
  114. continue;
  115. }
  116. else if(*ptr == '\0'){
  117. fEnd = 1;
  118. *tptr = '\0';
  119. *bptr = strdupz( tmp );
  120. if ( !*bptr ) {
  121. for ( bptr--; bptr >= buf; bptr-- ) {
  122. freez( *bptr );
  123. }
  124. freez( buf );
  125. freez( tmp );
  126. return NULL;
  127. }
  128. bptr++;
  129. tptr = tmp;
  130. break;
  131. }
  132. else if(*ptr == delimiter){
  133. *tptr = '\0';
  134. *bptr = strdupz( tmp );
  135. if ( !*bptr ) {
  136. for ( bptr--; bptr >= buf; bptr-- ) {
  137. freez( *bptr );
  138. }
  139. freez( buf );
  140. freez( tmp );
  141. return NULL;
  142. }
  143. bptr++;
  144. tptr = tmp;
  145. continue;
  146. }
  147. else{
  148. *tptr++ = *ptr;
  149. continue;
  150. }
  151. if ( fEnd ) {
  152. break;
  153. }
  154. }
  155. *bptr = NULL;
  156. freez( tmp );
  157. return buf;
  158. }
  159. /**
  160. * @brief Search a buffer for a keyword (or regular expression)
  161. * @details Search the source buffer for a keyword (or regular expression) and
  162. * copy matches to the destination buffer.
  163. * @param[in] src The source buffer to be searched
  164. * @param[in] src_sz Size of \p src
  165. * @param[in, out] dest The destination buffer where the results will be
  166. * written out to. If NULL, the results will just be discarded.
  167. * @param[out] dest_sz Size of \p dest
  168. * @param[in] keyword The keyword or pattern to be searched in the src buffer
  169. * @param[in] regex The precompiled regular expression to be search in the
  170. * src buffer. If NULL, \p keyword will be used instead.
  171. * @param[in] ignore_case Perform case insensitive search if 1.
  172. * @return Number of matches, or -1 in case of error
  173. */
  174. int search_keyword( char *src, size_t src_sz __maybe_unused,
  175. char *dest, size_t *dest_sz,
  176. const char *keyword, regex_t *regex,
  177. const int ignore_case){
  178. m_assert(src[src_sz - 1] == '\0', "src[src_sz - 1] should be '\0' but it's not");
  179. m_assert((dest && dest_sz) || (!dest && !dest_sz), "either both dest and dest_sz exist, or none does");
  180. if(unlikely(dest && !dest_sz))
  181. return -1;
  182. regex_t regex_compiled;
  183. if(regex)
  184. regex_compiled = *regex;
  185. else{
  186. char regexString[MAX_REGEX_SIZE];
  187. const int regex_flags = ignore_case ? REG_EXTENDED | REG_NEWLINE | REG_ICASE : REG_EXTENDED | REG_NEWLINE;
  188. snprintf(regexString, MAX_REGEX_SIZE, ".*(%s).*", keyword);
  189. int rc;
  190. if (unlikely((rc = regcomp(&regex_compiled, regexString, regex_flags)))){
  191. size_t regcomp_err_str_size = regerror(rc, &regex_compiled, 0, 0);
  192. char *regcomp_err_str = mallocz(regcomp_err_str_size);
  193. regerror(rc, &regex_compiled, regcomp_err_str, regcomp_err_str_size);
  194. freez(regcomp_err_str);
  195. fatal("Could not compile regular expression:%.*s, error: %s", (int) MAX_REGEX_SIZE, regexString, regcomp_err_str);
  196. };
  197. }
  198. regmatch_t groupArray[1];
  199. int matches = 0;
  200. char *cursor = src;
  201. if(dest_sz)
  202. *dest_sz = 0;
  203. for ( ; ; matches++){
  204. if (regexec(&regex_compiled, cursor, 1, groupArray, REG_NOTBOL | REG_NOTEOL))
  205. break; // No more matches
  206. if (groupArray[0].rm_so == -1)
  207. break; // No more groups
  208. size_t match_len = (size_t) (groupArray[0].rm_eo - groupArray[0].rm_so);
  209. // debug_log( "Match %d [%2d-%2d]:%.*s\n", matches, groupArray[0].rm_so,
  210. // groupArray[0].rm_eo, (int) match_len, cursor + groupArray[0].rm_so);
  211. if(dest && dest_sz){
  212. memcpy( &dest[*dest_sz], cursor + groupArray[0].rm_so, match_len);
  213. *dest_sz += match_len + 1;
  214. dest[*dest_sz - 1] = '\n';
  215. }
  216. cursor += groupArray[0].rm_eo;
  217. }
  218. if(!regex)
  219. regfree(&regex_compiled);
  220. return matches;
  221. }
  222. /**
  223. * @brief Extract web log parser configuration from string
  224. * @param[in] log_format String that describes the log format
  225. * @param[in] delimiter Delimiter to be used when parsing a CSV log format
  226. * @return Pointer to struct that contains the extracted log format
  227. * configuration or NULL if no fields found in log_format.
  228. */
  229. Web_log_parser_config_t *read_web_log_parser_config(const char *log_format, const char delimiter){
  230. int num_fields = count_fields(log_format, delimiter);
  231. if(num_fields <= 0) return NULL;
  232. /* If first execution of this function, initialise regexs */
  233. static int regexs_initialised = 0;
  234. // TODO: Tests needed for following regexs.
  235. if(!regexs_initialised){
  236. assert(regcomp(&vhost_regex, "^[a-zA-Z0-9:.-]+$", REG_NOSUB | REG_EXTENDED) == 0);
  237. assert(regcomp(&req_client_regex, "^([0-9a-f:.]+|localhost)$", REG_NOSUB | REG_EXTENDED) == 0);
  238. assert(regcomp(&cipher_suite_regex, "^[A-Z0-9_-]+$", REG_NOSUB | REG_EXTENDED) == 0);
  239. regexs_initialised = 1;
  240. }
  241. Web_log_parser_config_t *wblp_config = callocz(1, sizeof(Web_log_parser_config_t));
  242. wblp_config->num_fields = num_fields;
  243. wblp_config->delimiter = delimiter;
  244. char **parsed_format = parse_csv(log_format, delimiter, num_fields); // parsed_format is NULL-terminated
  245. wblp_config->fields = callocz(num_fields, sizeof(web_log_line_field_t));
  246. unsigned int fields_off = 0;
  247. for(int i = 0; i < num_fields; i++ ){
  248. if(strcmp(parsed_format[i], "$host:$server_port") == 0 ||
  249. strcmp(parsed_format[i], "%v:%p") == 0) {
  250. wblp_config->fields[fields_off++] = VHOST_WITH_PORT;
  251. continue;
  252. }
  253. if(strcmp(parsed_format[i], "$host") == 0 ||
  254. strcmp(parsed_format[i], "$http_host") == 0 ||
  255. strcmp(parsed_format[i], "%v") == 0) {
  256. wblp_config->fields[fields_off++] = VHOST;
  257. continue;
  258. }
  259. if(strcmp(parsed_format[i], "$server_port") == 0 ||
  260. strcmp(parsed_format[i], "%p") == 0) {
  261. wblp_config->fields[fields_off++] = PORT;
  262. continue;
  263. }
  264. if(strcmp(parsed_format[i], "$scheme") == 0) {
  265. wblp_config->fields[fields_off++] = REQ_SCHEME;
  266. continue;
  267. }
  268. if(strcmp(parsed_format[i], "$remote_addr") == 0 ||
  269. strcmp(parsed_format[i], "%a") == 0 ||
  270. strcmp(parsed_format[i], "%h") == 0) {
  271. wblp_config->fields[fields_off++] = REQ_CLIENT;
  272. continue;
  273. }
  274. if(strcmp(parsed_format[i], "$request") == 0 ||
  275. strcmp(parsed_format[i], "%r") == 0) {
  276. wblp_config->fields[fields_off++] = REQ;
  277. continue;
  278. }
  279. if(strcmp(parsed_format[i], "$request_method") == 0 ||
  280. strcmp(parsed_format[i], "%m") == 0) {
  281. wblp_config->fields[fields_off++] = REQ_METHOD;
  282. continue;
  283. }
  284. if(strcmp(parsed_format[i], "$request_uri") == 0 ||
  285. strcmp(parsed_format[i], "%U") == 0) {
  286. wblp_config->fields[fields_off++] = REQ_URL;
  287. continue;
  288. }
  289. if(strcmp(parsed_format[i], "$server_protocol") == 0 ||
  290. strcmp(parsed_format[i], "%H") == 0) {
  291. wblp_config->fields[fields_off++] = REQ_PROTO;
  292. continue;
  293. }
  294. if(strcmp(parsed_format[i], "$request_length") == 0 ||
  295. strcmp(parsed_format[i], "%I") == 0) {
  296. wblp_config->fields[fields_off++] = REQ_SIZE;
  297. continue;
  298. }
  299. if(strcmp(parsed_format[i], "$request_time") == 0 ||
  300. strcmp(parsed_format[i], "%D") == 0) {
  301. wblp_config->fields[fields_off++] = REQ_PROC_TIME;
  302. continue;
  303. }
  304. if(strcmp(parsed_format[i], "$status") == 0 ||
  305. strcmp(parsed_format[i], "%>s") == 0 ||
  306. strcmp(parsed_format[i], "%s") == 0) {
  307. wblp_config->fields[fields_off++] = RESP_CODE;
  308. continue;
  309. }
  310. if(strcmp(parsed_format[i], "$bytes_sent") == 0 ||
  311. strcmp(parsed_format[i], "$body_bytes_sent") == 0 ||
  312. strcmp(parsed_format[i], "%b") == 0 ||
  313. strcmp(parsed_format[i], "%O") == 0 ||
  314. strcmp(parsed_format[i], "%B") == 0) {
  315. wblp_config->fields[fields_off++] = RESP_SIZE;
  316. continue;
  317. }
  318. if(strcmp(parsed_format[i], "$upstream_response_time") == 0) {
  319. wblp_config->fields[fields_off++] = UPS_RESP_TIME;
  320. continue;
  321. }
  322. if(strcmp(parsed_format[i], "$ssl_protocol") == 0) {
  323. wblp_config->fields[fields_off++] = SSL_PROTO;
  324. continue;
  325. }
  326. if(strcmp(parsed_format[i], "$ssl_cipher") == 0) {
  327. wblp_config->fields[fields_off++] = SSL_CIPHER_SUITE;
  328. continue;
  329. }
  330. if(strcmp(parsed_format[i], "$time_local") == 0 || strcmp(parsed_format[i], "[$time_local]") == 0 ||
  331. strcmp(parsed_format[i], "%t") == 0 || strcmp(parsed_format[i], "[%t]") == 0) {
  332. wblp_config->fields = reallocz(wblp_config->fields, (num_fields + 1) * sizeof(web_log_line_field_t));
  333. wblp_config->fields[fields_off++] = TIME;
  334. wblp_config->fields[fields_off++] = TIME; // TIME takes 2 fields
  335. wblp_config->num_fields++; // TIME takes 2 fields
  336. continue;
  337. }
  338. wblp_config->fields[fields_off++] = CUSTOM;
  339. }
  340. for(int i = 0; parsed_format[i] != NULL; i++)
  341. freez(parsed_format[i]);
  342. freez(parsed_format);
  343. return wblp_config;
  344. }
  345. /**
  346. * @brief Parse a web log line to extract individual fields.
  347. * @param[in] wblp_config Configuration that specifies how to parse the line.
  348. * @param[in] line Web log record to be parsed. '\n', '\r' or '\0' terminated.
  349. * @param[out] log_line_parsed Struct that stores the results of parsing.
  350. */
  351. void parse_web_log_line(const Web_log_parser_config_t *wblp_config,
  352. char *line, size_t line_len,
  353. Log_line_parsed_t *log_line_parsed){
  354. /* Read parsing configuration */
  355. web_log_line_field_t *fields_format = wblp_config->fields;
  356. const int num_fields_config = wblp_config->num_fields;
  357. const char delimiter = wblp_config->delimiter;
  358. const int verify = wblp_config->verify_parsed_logs;
  359. /* Consume new lines and spaces at end of line */
  360. for(; line[line_len-1] == '\n' || line[line_len-1] == '\r' || line[line_len-1] == ' '; line_len--);
  361. char *field = line;
  362. char *offset = line;
  363. size_t field_size = 0;
  364. for(int i = 0; i < num_fields_config; i++ ){
  365. /* Consume double quotes and extra delimiters at beginning of field */
  366. while(*field == '"' || *field == delimiter) field++, offset++;
  367. /* Find offset boundaries of next field in line */
  368. while(((size_t)(offset - line) < line_len) && *offset != delimiter) offset++;
  369. if(unlikely(*(offset - 1) == '"')) offset--;
  370. field_size = (size_t) (offset - field);
  371. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  372. debug_log( "Field[%d]:%.*s", i, (int)field_size, field);
  373. #endif
  374. if(fields_format[i] == CUSTOM){
  375. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  376. debug_log( "Item %d (type: CUSTOM or UNKNOWN):%.*s", i, (int)field_size, field);
  377. #endif
  378. goto next_item;
  379. }
  380. char *port = field;
  381. size_t port_size = 0;
  382. size_t vhost_size = 0;
  383. if(fields_format[i] == VHOST_WITH_PORT){
  384. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  385. debug_log( "Item %d (type: VHOST_WITH_PORT):%.*s", i, (int)field_size, field);
  386. #endif
  387. if(unlikely(field[0] == '-' && field_size == 1)){
  388. log_line_parsed->vhost[0] = '\0';
  389. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  390. log_line_parsed->parsing_errors++;
  391. goto next_item;
  392. }
  393. while(*port != ':' && vhost_size < field_size) { port++; vhost_size++; };
  394. if(likely(vhost_size < field_size)){
  395. /* ':' detected in string */
  396. port++;
  397. port_size = field_size - vhost_size - 1;
  398. field_size = vhost_size; // now field represents vhost and port is separate
  399. }
  400. else {
  401. /* no ':' detected in string - invalid */
  402. log_line_parsed->vhost[0] = '\0';
  403. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  404. log_line_parsed->parsing_errors++;
  405. goto next_item;
  406. }
  407. }
  408. if(fields_format[i] == VHOST_WITH_PORT || fields_format[i] == VHOST){
  409. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  410. debug_log( "Item %d (type: VHOST):%.*s", i, (int)field_size, field);
  411. #endif
  412. if(unlikely(field[0] == '-' && field_size == 1)){
  413. log_line_parsed->vhost[0] = '\0';
  414. log_line_parsed->parsing_errors++;
  415. goto next_item;
  416. }
  417. // TODO: Add below case in code!!!
  418. // nginx $host and $http_host return ipv6 in [], apache doesn't
  419. // TODO: TEST! This case hasn't been tested!
  420. // char *pch = strchr(parsed[i], ']');
  421. // if(pch){
  422. // *pch = '\0';
  423. // memmove(parsed[i], parsed[i]+1, strlen(parsed[i]));
  424. // }
  425. snprintfz(log_line_parsed->vhost, VHOST_MAX_LEN, "%.*s", (int) field_size, field);
  426. if(verify){
  427. // if(field_size >= VHOST_MAX_LEN){
  428. // #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  429. // collector_error("VHOST is invalid");
  430. // #endif
  431. // log_line_parsed->vhost[0] = '\0';
  432. // log_line_parsed->parsing_errors++;
  433. // goto next_item; // TODO: Not entirely right, as it will also skip PORT parsing in case of VHOST_WITH_PORT
  434. // }
  435. if(unlikely(regexec(&vhost_regex, log_line_parsed->vhost, 0, NULL, 0) == REG_NOMATCH)){
  436. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  437. collector_error("VHOST is invalid");
  438. #endif
  439. // log_line_parsed->vhost[0] = 'invalid';
  440. snprintf(log_line_parsed->vhost, sizeof(WEB_LOG_INVALID_HOST_STR), WEB_LOG_INVALID_HOST_STR);
  441. log_line_parsed->parsing_errors++;
  442. }
  443. }
  444. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  445. debug_log( "Extracted VHOST:%s", log_line_parsed->vhost);
  446. #endif
  447. if(fields_format[i] == VHOST) goto next_item;
  448. }
  449. if(fields_format[i] == VHOST_WITH_PORT || fields_format[i] == PORT){
  450. if(fields_format[i] != VHOST_WITH_PORT){
  451. port = field;
  452. port_size = field_size;
  453. }
  454. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  455. debug_log( "Item %d (type: PORT):%.*s", i, (int) port_size, port);
  456. #endif
  457. if(unlikely(port[0] == '-' && port_size == 1)){
  458. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  459. log_line_parsed->parsing_errors++;
  460. goto next_item;
  461. }
  462. char port_d[PORT_MAX_LEN];
  463. snprintfz( port_d, PORT_MAX_LEN, "%.*s", (int) port_size, port);
  464. if(likely(str2int(&log_line_parsed->port, port_d, 10) == STR2XX_SUCCESS)){
  465. if(verify){
  466. if(unlikely(log_line_parsed->port < 80 || log_line_parsed->port > 49151)){
  467. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  468. collector_error("PORT is invalid (<80 or >49151)");
  469. #endif
  470. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  471. log_line_parsed->parsing_errors++;
  472. }
  473. }
  474. }
  475. else{
  476. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  477. collector_error("Error while extracting PORT from string");
  478. #endif
  479. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  480. log_line_parsed->parsing_errors++;
  481. }
  482. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  483. debug_log( "Extracted PORT:%d", log_line_parsed->port);
  484. #endif
  485. goto next_item;
  486. }
  487. if(fields_format[i] == REQ_SCHEME){
  488. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  489. debug_log( "Item %d (type: REQ_SCHEME):%.*s", i, (int)field_size, field);
  490. #endif
  491. if(unlikely(field[0] == '-' && field_size == 1)){
  492. log_line_parsed->req_scheme[0] = '\0';
  493. log_line_parsed->parsing_errors++;
  494. goto next_item;
  495. }
  496. snprintfz(log_line_parsed->req_scheme, REQ_SCHEME_MAX_LEN, "%.*s", (int) field_size, field);
  497. if(verify){
  498. if(unlikely( strcmp(log_line_parsed->req_scheme, "http") &&
  499. strcmp(log_line_parsed->req_scheme, "https"))){
  500. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  501. collector_error("REQ_SCHEME is invalid (must be either 'http' or 'https')");
  502. #endif
  503. log_line_parsed->req_scheme[0] = '\0';
  504. log_line_parsed->parsing_errors++;
  505. }
  506. }
  507. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  508. debug_log( "Extracted REQ_SCHEME:%s", log_line_parsed->req_scheme);
  509. #endif
  510. goto next_item;
  511. }
  512. if(fields_format[i] == REQ_CLIENT){
  513. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  514. debug_log( "Item %d (type: REQ_CLIENT):%.*s", i, (int)field_size, field);
  515. #endif
  516. if(unlikely(field[0] == '-' && field_size == 1)){
  517. log_line_parsed->req_client[0] = '\0';
  518. log_line_parsed->parsing_errors++;
  519. goto next_item;
  520. }
  521. snprintfz(log_line_parsed->req_client, REQ_CLIENT_MAX_LEN, "%.*s", (int)field_size, field);
  522. if(verify){
  523. int regex_rc = regexec(&req_client_regex, log_line_parsed->req_client, 0, NULL, 0);
  524. if (likely(regex_rc == 0)) {/* do nothing */}
  525. else if (unlikely(regex_rc == REG_NOMATCH)) {
  526. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  527. collector_error("REQ_CLIENT is invalid");
  528. #endif
  529. snprintf(log_line_parsed->req_client, REQ_CLIENT_MAX_LEN, "%s", WEB_LOG_INVALID_CLIENT_IP_STR);
  530. log_line_parsed->parsing_errors++;
  531. }
  532. else {
  533. size_t err_msg_size = regerror(regex_rc, &req_client_regex, NULL, 0);
  534. char *err_msg = mallocz(err_msg_size);
  535. regerror(regex_rc, &req_client_regex, err_msg, err_msg_size);
  536. collector_error("req_client_regex error:%s", err_msg);
  537. freez(err_msg);
  538. m_assert(0, "req_client_regex has failed");
  539. }
  540. }
  541. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  542. debug_log( "Extracted REQ_CLIENT:%s", log_line_parsed->req_client);
  543. #endif
  544. goto next_item;
  545. }
  546. if(fields_format[i] == REQ || fields_format[i] == REQ_METHOD){
  547. /* If fields_format[i] == REQ, then field is filled in with request in the previous code */
  548. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  549. debug_log( "Item %d (type: REQ or REQ_METHOD):%.*s", i, (int)field_size, field);
  550. #endif
  551. snprintfz( log_line_parsed->req_method, REQ_METHOD_MAX_LEN, "%.*s", (int)field_size, field);
  552. if(verify){
  553. if( unlikely(
  554. /* GET and POST are the most common requests, so check them first */
  555. strcmp(log_line_parsed->req_method, "GET") &&
  556. strcmp(log_line_parsed->req_method, "POST") &&
  557. strcmp(log_line_parsed->req_method, "ACL") &&
  558. strcmp(log_line_parsed->req_method, "BASELINE-CONTROL") &&
  559. strcmp(log_line_parsed->req_method, "BIND") &&
  560. strcmp(log_line_parsed->req_method, "CHECKIN") &&
  561. strcmp(log_line_parsed->req_method, "CHECKOUT") &&
  562. strcmp(log_line_parsed->req_method, "CONNECT") &&
  563. strcmp(log_line_parsed->req_method, "COPY") &&
  564. strcmp(log_line_parsed->req_method, "DELETE") &&
  565. strcmp(log_line_parsed->req_method, "HEAD") &&
  566. strcmp(log_line_parsed->req_method, "LABEL") &&
  567. strcmp(log_line_parsed->req_method, "LINK") &&
  568. strcmp(log_line_parsed->req_method, "LOCK") &&
  569. strcmp(log_line_parsed->req_method, "MERGE") &&
  570. strcmp(log_line_parsed->req_method, "MKACTIVITY") &&
  571. strcmp(log_line_parsed->req_method, "MKCALENDAR") &&
  572. strcmp(log_line_parsed->req_method, "MKCOL") &&
  573. strcmp(log_line_parsed->req_method, "MKREDIRECTREF") &&
  574. strcmp(log_line_parsed->req_method, "MKWORKSPACE") &&
  575. strcmp(log_line_parsed->req_method, "MOVE") &&
  576. strcmp(log_line_parsed->req_method, "OPTIONS") &&
  577. strcmp(log_line_parsed->req_method, "ORDERPATCH") &&
  578. strcmp(log_line_parsed->req_method, "PATCH") &&
  579. strcmp(log_line_parsed->req_method, "PRI") &&
  580. strcmp(log_line_parsed->req_method, "PROPFIND") &&
  581. strcmp(log_line_parsed->req_method, "PROPPATCH") &&
  582. strcmp(log_line_parsed->req_method, "PUT") &&
  583. strcmp(log_line_parsed->req_method, "REBIND") &&
  584. strcmp(log_line_parsed->req_method, "REPORT") &&
  585. strcmp(log_line_parsed->req_method, "SEARCH") &&
  586. strcmp(log_line_parsed->req_method, "TRACE") &&
  587. strcmp(log_line_parsed->req_method, "UNBIND") &&
  588. strcmp(log_line_parsed->req_method, "UNCHECKOUT") &&
  589. strcmp(log_line_parsed->req_method, "UNLINK") &&
  590. strcmp(log_line_parsed->req_method, "UNLOCK") &&
  591. strcmp(log_line_parsed->req_method, "UPDATE") &&
  592. strcmp(log_line_parsed->req_method, "UPDATEREDIRECTREF") &&
  593. strcmp(log_line_parsed->req_method, "-"))) {
  594. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  595. collector_error("REQ_METHOD is invalid");
  596. #endif
  597. log_line_parsed->req_method[0] = '\0';
  598. log_line_parsed->parsing_errors++;
  599. }
  600. }
  601. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  602. debug_log( "Extracted REQ_METHOD:%s", log_line_parsed->req_method);
  603. #endif
  604. if(fields_format[i] == REQ && field[0] != '-') {
  605. while(*(offset + 1) == delimiter) offset++; // Consume extra whitespace characters
  606. field = ++offset;
  607. while(*offset != delimiter && ((size_t)(offset - line) < line_len)) offset++;
  608. field_size = (size_t) (offset - field);
  609. }
  610. else goto next_item;
  611. }
  612. if(fields_format[i] == REQ || fields_format[i] == REQ_URL){
  613. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  614. debug_log( "Item %d (type: REQ or REQ_URL):%.*s", i, (int)field_size, field);
  615. #endif
  616. snprintfz( log_line_parsed->req_URL, REQ_URL_MAX_LEN, "%.*s", (int)field_size, field);
  617. // if(unlikely(field[0] == '-' && field_size == 1)){
  618. // log_line_parsed->req_method[0] = '\0';
  619. // log_line_parsed->parsing_errors++;
  620. // }
  621. //if(verify){} ??
  622. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  623. debug_log( "Extracted REQ_URL:%s", log_line_parsed->req_URL ? log_line_parsed->req_URL : "NULL!");
  624. #endif
  625. if(fields_format[i] == REQ) {
  626. while(*(offset + 1) == delimiter) offset++; // Consume extra whitespace characters
  627. field = ++offset;
  628. while(*offset != delimiter && ((size_t)(offset - line) < line_len)) offset++;
  629. field_size = (size_t) (offset - field);
  630. }
  631. else goto next_item;
  632. }
  633. if(fields_format[i] == REQ || fields_format[i] == REQ_PROTO){
  634. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  635. debug_log( "Item %d (type: REQ or REQ_PROTO):%.*s", i, (int)field_size, field);
  636. #endif
  637. if(unlikely(field[0] == '-' && field_size == 1)){
  638. log_line_parsed->req_proto[0] = '\0';
  639. log_line_parsed->parsing_errors++;
  640. goto next_item;
  641. }
  642. if(unlikely( field_size > REQ_PROTO_PREF_SIZE + REQ_PROTO_MAX_LEN - 1)){
  643. field_size = REQ_PROTO_PREF_SIZE + REQ_PROTO_MAX_LEN - 1;
  644. }
  645. size_t req_proto_num_size = field_size - REQ_PROTO_PREF_SIZE;
  646. if(verify){
  647. if(unlikely(field_size < 6 ||
  648. req_proto_num_size == 0 ||
  649. strncmp(field, "HTTP/", REQ_PROTO_PREF_SIZE) ||
  650. ( strncmp(&field[REQ_PROTO_PREF_SIZE], "1", req_proto_num_size) &&
  651. strncmp(&field[REQ_PROTO_PREF_SIZE], "1.0", req_proto_num_size) &&
  652. strncmp(&field[REQ_PROTO_PREF_SIZE], "1.1", req_proto_num_size) &&
  653. strncmp(&field[REQ_PROTO_PREF_SIZE], "2", req_proto_num_size) &&
  654. strncmp(&field[REQ_PROTO_PREF_SIZE], "2.0", req_proto_num_size)))) {
  655. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  656. collector_error("REQ_PROTO is invalid");
  657. #endif
  658. log_line_parsed->req_proto[0] = '\0';
  659. log_line_parsed->parsing_errors++;
  660. }
  661. else snprintfz( log_line_parsed->req_proto, req_proto_num_size + 1,
  662. "%.*s", (int)req_proto_num_size, &field[REQ_PROTO_PREF_SIZE]);
  663. }
  664. else snprintfz( log_line_parsed->req_proto, req_proto_num_size + 1,
  665. "%.*s", (int)req_proto_num_size, &field[REQ_PROTO_PREF_SIZE]);
  666. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  667. debug_log( "Extracted REQ_PROTO:%s", log_line_parsed->req_proto);
  668. #endif
  669. goto next_item;
  670. }
  671. if(fields_format[i] == REQ_SIZE){
  672. /* TODO: Differentiate between '-' or 0 and an invalid request size.
  673. * right now, all these will set req_size == 0 */
  674. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  675. debug_log( "Item %d (type: REQ_SIZE):%.*s", i, (int)field_size, field);
  676. #endif
  677. char req_size_d[REQ_SIZE_MAX_LEN];
  678. snprintfz( req_size_d, REQ_SIZE_MAX_LEN, "%.*s", (int) field_size, field);
  679. if(field[0] == '-' && field_size == 1) {
  680. log_line_parsed->req_size = 0; // Request size can be '-'
  681. }
  682. else if(likely(str2int(&log_line_parsed->req_size, req_size_d, 10) == STR2XX_SUCCESS)){
  683. if(verify){
  684. if(unlikely(log_line_parsed->req_size < 0)){
  685. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  686. collector_error("REQ_SIZE is invalid (<0)");
  687. #endif
  688. log_line_parsed->req_size = 0;
  689. log_line_parsed->parsing_errors++;
  690. }
  691. }
  692. }
  693. else{
  694. collector_error("Error while extracting REQ_SIZE from string");
  695. log_line_parsed->req_size = 0;
  696. log_line_parsed->parsing_errors++;
  697. }
  698. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  699. debug_log( "Extracted REQ_SIZE:%d", log_line_parsed->req_size);
  700. #endif
  701. goto next_item;
  702. }
  703. if(fields_format[i] == REQ_PROC_TIME){
  704. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  705. debug_log( "Item %d (type: REQ_PROC_TIME):%.*s", i, (int)field_size, field);
  706. #endif
  707. if(unlikely(field[0] == '-' && field_size == 1)){
  708. log_line_parsed->req_proc_time = WEB_LOG_INVALID_PORT;
  709. log_line_parsed->parsing_errors++;
  710. goto next_item;
  711. }
  712. float f = 0;
  713. char req_proc_time_d[REQ_PROC_TIME_MAX_LEN];
  714. snprintfz( req_proc_time_d, REQ_PROC_TIME_MAX_LEN, "%.*s", (int) field_size, field);
  715. if(memchr(field, '.', field_size)){ // nginx time is in seconds with a milliseconds resolution.
  716. if(likely(str2float(&f, req_proc_time_d) == STR2XX_SUCCESS)){
  717. log_line_parsed->req_proc_time = (int) (f * 1.0E6);
  718. }
  719. else {
  720. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  721. collector_error("Error while extracting REQ_PROC_TIME from string");
  722. #endif
  723. log_line_parsed->req_proc_time = 0;
  724. log_line_parsed->parsing_errors++;
  725. }
  726. }
  727. else{ // apache time is in microseconds
  728. if(unlikely(str2int(&log_line_parsed->req_proc_time, req_proc_time_d, 10) != STR2XX_SUCCESS)) {
  729. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  730. collector_error("Error while extracting REQ_PROC_TIME from string");
  731. #endif
  732. log_line_parsed->req_proc_time = 0;
  733. log_line_parsed->parsing_errors++;
  734. }
  735. }
  736. if(verify){
  737. if(unlikely(log_line_parsed->req_proc_time < 0)){
  738. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  739. collector_error("REQ_PROC_TIME is invalid (<0)");
  740. #endif
  741. log_line_parsed->req_proc_time = 0;
  742. log_line_parsed->parsing_errors++;
  743. }
  744. }
  745. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  746. debug_log( "Extracted REQ_PROC_TIME:%d", log_line_parsed->req_proc_time);
  747. #endif
  748. goto next_item;
  749. }
  750. if(fields_format[i] == RESP_CODE){
  751. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  752. debug_log( "Item %d (type: RESP_CODE):%.*s\n", i, (int)field_size, field);
  753. #endif
  754. if(unlikely(field[0] == '-' && field_size == 1)){
  755. log_line_parsed->resp_code = 0;
  756. log_line_parsed->parsing_errors++;
  757. goto next_item;
  758. }
  759. char resp_code_d[REQ_RESP_CODE_MAX_LEN];
  760. snprintfz( resp_code_d, REQ_RESP_CODE_MAX_LEN, "%.*s", (int)field_size, field);
  761. if(likely(str2int(&log_line_parsed->resp_code, resp_code_d, 10) == STR2XX_SUCCESS)){
  762. if(verify){
  763. /* rfc7231
  764. * Informational responses (100–199),
  765. * Successful responses (200–299),
  766. * Redirects (300–399),
  767. * Client errors (400–499),
  768. * Server errors (500–599). */
  769. if(unlikely(log_line_parsed->resp_code < 100 || log_line_parsed->resp_code > 599)){
  770. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  771. collector_error("RESP_CODE is invalid (<100 or >599)");
  772. #endif
  773. log_line_parsed->resp_code = 0;
  774. log_line_parsed->parsing_errors++;
  775. }
  776. }
  777. }
  778. else{
  779. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  780. collector_error("Error while extracting RESP_CODE from string");
  781. #endif
  782. log_line_parsed->resp_code = 0;
  783. log_line_parsed->parsing_errors++;
  784. }
  785. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  786. debug_log( "Extracted RESP_CODE:%d", log_line_parsed->resp_code);
  787. #endif
  788. goto next_item;
  789. }
  790. if(fields_format[i] == RESP_SIZE){
  791. /* TODO: Differentiate between '-' or 0 and an invalid response size.
  792. * right now, all these will set resp_size == 0 */
  793. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  794. debug_log( "Item %d (type: RESP_SIZE):%.*s", i, (int)field_size, field);
  795. #endif
  796. char resp_size_d[REQ_RESP_SIZE_MAX_LEN];
  797. snprintfz( resp_size_d, REQ_RESP_SIZE_MAX_LEN, "%.*s", (int)field_size, field);
  798. if(field[0] == '-' && field_size == 1) {
  799. log_line_parsed->resp_size = 0; // Response size can be '-'
  800. }
  801. else if(likely(str2int(&log_line_parsed->resp_size, resp_size_d, 10) == STR2XX_SUCCESS)){
  802. if(verify){
  803. if(unlikely(log_line_parsed->resp_size < 0)){
  804. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  805. collector_error("RESP_SIZE is invalid (<0)");
  806. #endif
  807. log_line_parsed->resp_size = 0;
  808. log_line_parsed->parsing_errors++;
  809. }
  810. }
  811. }
  812. else {
  813. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  814. collector_error("Error while extracting RESP_SIZE from string");
  815. #endif
  816. log_line_parsed->resp_size = 0;
  817. log_line_parsed->parsing_errors++;
  818. }
  819. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  820. debug_log( "Extracted RESP_SIZE:%d", log_line_parsed->resp_size);
  821. #endif
  822. goto next_item;
  823. }
  824. if(fields_format[i] == UPS_RESP_TIME){
  825. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  826. debug_log( "Item %d (type: UPS_RESP_TIME):%.*s", i, (int)field_size, field);
  827. #endif
  828. if(field[0] == '-' && field_size == 1) {
  829. log_line_parsed->ups_resp_time = 0;
  830. log_line_parsed->parsing_errors++;
  831. goto next_item;
  832. }
  833. /* Times of several responses are separated by commas and colons. Following the
  834. * Go parser implementation, where only the first one is kept, the others are
  835. * discarded. Also, there must be no space in between them. Needs testing... */
  836. char *pch = memchr(field, ',', field_size);
  837. if(pch) field_size = pch - field;
  838. float f = 0;
  839. char ups_resp_time_d[UPS_RESP_TIME_MAX_LEN];
  840. snprintfz( ups_resp_time_d, UPS_RESP_TIME_MAX_LEN, "%.*s", (int)field_size, field);
  841. if(memchr(field, '.', field_size)){ // nginx time is in seconds with a milliseconds resolution.
  842. if(likely(str2float(&f, ups_resp_time_d) == STR2XX_SUCCESS)){
  843. log_line_parsed->ups_resp_time = (int) (f * 1.0E6);
  844. }
  845. else {
  846. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  847. collector_error("Error while extracting UPS_RESP_TIME from string");
  848. #endif
  849. log_line_parsed->ups_resp_time = 0;
  850. log_line_parsed->parsing_errors++;
  851. }
  852. }
  853. else{ // unlike in the REQ_PROC_TIME case, apache doesn't have an equivalent here
  854. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  855. collector_error("Error while extracting UPS_RESP_TIME from string");
  856. #endif
  857. log_line_parsed->ups_resp_time = 0;
  858. log_line_parsed->parsing_errors++;
  859. }
  860. if(verify){
  861. if(unlikely(log_line_parsed->ups_resp_time < 0)){
  862. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  863. collector_error("UPS_RESP_TIME is invalid (<0)");
  864. #endif
  865. log_line_parsed->ups_resp_time = 0;
  866. log_line_parsed->parsing_errors++;
  867. }
  868. }
  869. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  870. debug_log( "Extracted UPS_RESP_TIME:%d", log_line_parsed->ups_resp_time);
  871. #endif
  872. goto next_item;
  873. }
  874. if(fields_format[i] == SSL_PROTO){
  875. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  876. debug_log( "Item %d (type: SSL_PROTO):%.*s", i, (int)field_size, field);
  877. #endif
  878. if(field[0] == '-' && field_size == 1) {
  879. log_line_parsed->ssl_proto[0] = '\0';
  880. log_line_parsed->parsing_errors++;
  881. goto next_item;
  882. }
  883. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  884. debug_log( "SSL_PROTO field size:%zu", field_size);
  885. #endif
  886. snprintfz( log_line_parsed->ssl_proto, SSL_PROTO_MAX_LEN, "%.*s", (int)field_size, field);
  887. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  888. debug_log( "log_line_parsed->ssl_proto:%s", log_line_parsed->ssl_proto);
  889. #endif
  890. if(verify){
  891. if(unlikely(strcmp(log_line_parsed->ssl_proto, "TLSv1") &&
  892. strcmp(log_line_parsed->ssl_proto, "TLSv1.1") &&
  893. strcmp(log_line_parsed->ssl_proto, "TLSv1.2") &&
  894. strcmp(log_line_parsed->ssl_proto, "TLSv1.3") &&
  895. strcmp(log_line_parsed->ssl_proto, "SSLv2") &&
  896. strcmp(log_line_parsed->ssl_proto, "SSLv3"))) {
  897. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  898. collector_error("SSL_PROTO is invalid");
  899. #endif
  900. log_line_parsed->ssl_proto[0] = '\0';
  901. log_line_parsed->parsing_errors++;
  902. }
  903. }
  904. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  905. debug_log( "Extracted SSL_PROTO:%s", log_line_parsed->ssl_proto);
  906. #endif
  907. goto next_item;
  908. }
  909. if(fields_format[i] == SSL_CIPHER_SUITE){
  910. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  911. debug_log( "Item %d (type: SSL_CIPHER_SUITE):%.*s", i, (int)field_size, field);
  912. #endif
  913. if(field[0] == '-' && field_size == 1) {
  914. log_line_parsed->ssl_cipher[0] = '\0';
  915. log_line_parsed->parsing_errors++;
  916. }
  917. snprintfz( log_line_parsed->ssl_cipher, SSL_CIPHER_SUITE_MAX_LEN, "%.*s", (int)field_size, field);
  918. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  919. debug_log( "before: SSL_CIPHER_SUITE:%s", log_line_parsed->ssl_cipher);
  920. #endif
  921. if(verify){
  922. int regex_rc = regexec(&cipher_suite_regex, log_line_parsed->ssl_cipher, 0, NULL, 0);
  923. if (likely(regex_rc == 0)){/* do nothing */}
  924. else if (unlikely(regex_rc == REG_NOMATCH)) {
  925. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  926. collector_error("SSL_CIPHER_SUITE is invalid");
  927. #endif
  928. log_line_parsed->ssl_cipher[0] = '\0';
  929. log_line_parsed->parsing_errors++;
  930. }
  931. else {
  932. size_t err_msg_size = regerror(regex_rc, &cipher_suite_regex, NULL, 0);
  933. char *err_msg = mallocz(err_msg_size);
  934. regerror(regex_rc, &cipher_suite_regex, err_msg, err_msg_size);
  935. collector_error("cipher_suite_regex error:%s", err_msg);
  936. freez(err_msg);
  937. m_assert(0, "cipher_suite_regex has failed");
  938. }
  939. }
  940. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  941. debug_log( "Extracted SSL_CIPHER_SUITE:%s", log_line_parsed->ssl_cipher);
  942. #endif
  943. goto next_item;
  944. }
  945. if(fields_format[i] == TIME){
  946. if(wblp_config->skip_timestamp_parsing){
  947. while(*offset != ']') {offset++;};
  948. i++;
  949. offset++;
  950. goto next_item;
  951. }
  952. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  953. debug_log( "Item %d (type: TIME - 1st of 2 fields):%.*s", i, (int)field_size, field);
  954. #endif
  955. // TODO: What if TIME is invalid?
  956. // if(field[0] == '-' && field_size == 1) {
  957. // log_line_parsed->timestamp = 0;
  958. // log_line_parsed->parsing_errors++;
  959. // ++i;
  960. // goto next_item;
  961. // }
  962. char *datetime = field;
  963. if(memchr(datetime, '[', field_size)) {
  964. datetime++;
  965. field_size--;
  966. }
  967. struct tm ltm = {0};
  968. char *tz_str = strptime(datetime, "%d/%b/%Y:%H:%M:%S", &ltm);
  969. if(unlikely(tz_str == NULL)){
  970. collector_error("TIME datetime parsing failed");
  971. log_line_parsed->timestamp = 0;
  972. log_line_parsed->parsing_errors++;
  973. goto next_item;
  974. }
  975. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  976. debug_log( "strptime() result: year:%d mon:%d day:%d hour:%d min:%d sec:%d",
  977. ltm.tm_year, ltm.tm_mon, ltm.tm_mday,
  978. ltm.tm_hour, ltm.tm_min, ltm.tm_sec);
  979. #endif
  980. /* Deal with 2nd part of datetime i.e. timezone */
  981. m_assert(*tz_str == ' ', "Invalid TIME timezone");
  982. ++tz_str;
  983. m_assert(*tz_str == '+' || *tz_str == '-', "Invalid TIME timezone");
  984. char tz_sign = *tz_str;
  985. char *tz_str_end = ++tz_str;
  986. while(*tz_str_end != ']') tz_str_end++;
  987. m_assert(tz_str_end - tz_str == 4, "Invalid TIME timezone string length");
  988. char tz_num[4];
  989. memcpy(tz_num, tz_str, tz_str_end - tz_str);
  990. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  991. debug_log( "TIME 2nd part: %.*s", (int)(tz_str_end - tz_str), tz_str);
  992. #endif
  993. long int tz = strtol(tz_str, NULL, 10);
  994. long int tz_h = tz / 100;
  995. long int tz_m = tz % 100;
  996. int64_t tz_adj = (int64_t) tz_h * 3600 + (int64_t) tz_m * 60;
  997. if(tz_sign == '+') tz_adj *= -1; // if timezone is positive, we need to subtract it to get GMT
  998. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  999. debug_log( "Timezone: int:%ld, hrs:%ld, mins:%ld", tz, tz_h, tz_m);
  1000. #endif
  1001. if(-1 == (log_line_parsed->timestamp = timegm(&ltm) + tz_adj)){
  1002. collector_error("TIME datetime parsing failed");
  1003. log_line_parsed->timestamp = 0;
  1004. log_line_parsed->parsing_errors++;
  1005. }
  1006. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  1007. char tb[80];
  1008. strftime(tb, sizeof(tb), "%c", &ltm );
  1009. debug_log( "Extracted TIME:%ld", log_line_parsed->timestamp);
  1010. debug_log( "Extracted TIME string:%s", tb);
  1011. #endif
  1012. offset = tz_str_end + 1; // WARNING! this modifies the offset but it is required in the TIME case.
  1013. ++i; // TIME takes up 2 fields_format[] spaces, so skip the next one
  1014. goto next_item;
  1015. }
  1016. next_item:
  1017. /* If offset is located beyond the end of the line, terminate parsing */
  1018. if(unlikely((size_t) (offset - line) >= line_len)) break;
  1019. field = ++offset;
  1020. }
  1021. }
  1022. /**
  1023. * @brief Extract web log metrics from a group of web log fields.
  1024. * @param[in] parser_config Configuration specifying how and what web log
  1025. * metrics to extract.
  1026. * @param[in] line_parsed Web logs fields extracted from a web log line.
  1027. * @param[out] metrics Web logs metrics exctracted from the \p line_parsed
  1028. * web log fields, using the \p parser_config configuration.
  1029. */
  1030. void extract_web_log_metrics(Log_parser_config_t *parser_config,
  1031. Log_line_parsed_t *line_parsed,
  1032. Web_log_metrics_t *metrics){
  1033. /* Extract number of parsed lines */
  1034. /* NOTE: Commented out as it is done in flb_collect_logs_cb() now. */
  1035. // metrics->num_lines++;
  1036. /* Extract vhost */
  1037. // TODO: Reduce number of reallocs
  1038. if((parser_config->chart_config & CHART_VHOST) && *line_parsed->vhost){
  1039. int i;
  1040. for(i = 0; i < metrics->vhost_arr.size; i++){
  1041. if(!strcmp(metrics->vhost_arr.vhosts[i].name, line_parsed->vhost)){
  1042. metrics->vhost_arr.vhosts[i].count++;
  1043. break;
  1044. }
  1045. }
  1046. if(metrics->vhost_arr.size == i){ // Vhost not found in array - need to append
  1047. metrics->vhost_arr.size++;
  1048. if(metrics->vhost_arr.size >= metrics->vhost_arr.size_max){
  1049. metrics->vhost_arr.size_max = metrics->vhost_arr.size * VHOST_BUFFS_SCALE_FACTOR + 1;
  1050. metrics->vhost_arr.vhosts = reallocz( metrics->vhost_arr.vhosts,
  1051. metrics->vhost_arr.size_max * sizeof(struct log_parser_metrics_vhost));
  1052. }
  1053. snprintf(metrics->vhost_arr.vhosts[metrics->vhost_arr.size - 1].name, VHOST_MAX_LEN, "%s", line_parsed->vhost);
  1054. metrics->vhost_arr.vhosts[metrics->vhost_arr.size - 1].count = 1;
  1055. }
  1056. }
  1057. /* Extract port */
  1058. // TODO: Reduce number of reallocs
  1059. if((parser_config->chart_config & CHART_PORT) && line_parsed->port){
  1060. int i;
  1061. for(i = 0; i < metrics->port_arr.size; i++){
  1062. if(metrics->port_arr.ports[i].port == line_parsed->port){
  1063. metrics->port_arr.ports[i].count++;
  1064. break;
  1065. }
  1066. }
  1067. if(metrics->port_arr.size == i){ // Port not found in array - need to append
  1068. metrics->port_arr.size++;
  1069. if(metrics->port_arr.size >= metrics->port_arr.size_max){
  1070. metrics->port_arr.size_max = metrics->port_arr.size * PORT_BUFFS_SCALE_FACTOR + 1;
  1071. metrics->port_arr.ports = reallocz( metrics->port_arr.ports,
  1072. metrics->port_arr.size_max * sizeof(struct log_parser_metrics_port));
  1073. }
  1074. if(line_parsed->port == WEB_LOG_INVALID_PORT)
  1075. snprintfz(metrics->port_arr.ports[metrics->port_arr.size - 1].name, PORT_MAX_LEN, WEB_LOG_INVALID_PORT_STR);
  1076. else
  1077. snprintfz(metrics->port_arr.ports[metrics->port_arr.size - 1].name, PORT_MAX_LEN, "%d", line_parsed->port);
  1078. metrics->port_arr.ports[metrics->port_arr.size - 1].port = line_parsed->port;
  1079. metrics->port_arr.ports[metrics->port_arr.size - 1].count = 1;
  1080. }
  1081. }
  1082. /* Extract client metrics */
  1083. if(( parser_config->chart_config & ( CHART_IP_VERSION | CHART_REQ_CLIENT_CURRENT | CHART_REQ_CLIENT_ALL_TIME)) && *line_parsed->req_client) {
  1084. /* Invalid IP version */
  1085. if(unlikely(!strcmp(line_parsed->req_client, WEB_LOG_INVALID_CLIENT_IP_STR))){
  1086. if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.invalid++;
  1087. }
  1088. else if(strchr(line_parsed->req_client, ':')){
  1089. /* IPv6 version */
  1090. if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.v6++;
  1091. /* Unique Client IPv6 Address current poll */
  1092. if(parser_config->chart_config & CHART_REQ_CLIENT_CURRENT){
  1093. int i;
  1094. for(i = 0; i < metrics->req_clients_current_arr.ipv6_size; i++){
  1095. if(!strcmp(metrics->req_clients_current_arr.ipv6_req_clients[i], line_parsed->req_client)) break;
  1096. }
  1097. if(metrics->req_clients_current_arr.ipv6_size == i){ // Req client not found in array - need to append
  1098. metrics->req_clients_current_arr.ipv6_size++;
  1099. metrics->req_clients_current_arr.ipv6_req_clients = reallocz(metrics->req_clients_current_arr.ipv6_req_clients,
  1100. metrics->req_clients_current_arr.ipv6_size * sizeof(*metrics->req_clients_current_arr.ipv6_req_clients));
  1101. snprintf(metrics->req_clients_current_arr.ipv6_req_clients[metrics->req_clients_current_arr.ipv6_size - 1],
  1102. REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
  1103. }
  1104. }
  1105. /* Unique Client IPv6 Address all-time */
  1106. if(parser_config->chart_config & CHART_REQ_CLIENT_ALL_TIME){
  1107. int i;
  1108. for(i = 0; i < metrics->req_clients_alltime_arr.ipv6_size; i++){
  1109. if(!strcmp(metrics->req_clients_alltime_arr.ipv6_req_clients[i], line_parsed->req_client)) break;
  1110. }
  1111. if(metrics->req_clients_alltime_arr.ipv6_size == i){ // Req client not found in array - need to append
  1112. metrics->req_clients_alltime_arr.ipv6_size++;
  1113. metrics->req_clients_alltime_arr.ipv6_req_clients = reallocz(metrics->req_clients_alltime_arr.ipv6_req_clients,
  1114. metrics->req_clients_alltime_arr.ipv6_size * sizeof(*metrics->req_clients_alltime_arr.ipv6_req_clients));
  1115. snprintf(metrics->req_clients_alltime_arr.ipv6_req_clients[metrics->req_clients_alltime_arr.ipv6_size - 1],
  1116. REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
  1117. }
  1118. }
  1119. }
  1120. else{
  1121. /* IPv4 version */
  1122. if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.v4++;
  1123. /* Unique Client IPv4 Address current poll */
  1124. if(parser_config->chart_config & CHART_REQ_CLIENT_CURRENT){
  1125. int i;
  1126. for(i = 0; i < metrics->req_clients_current_arr.ipv4_size; i++){
  1127. if(!strcmp(metrics->req_clients_current_arr.ipv4_req_clients[i], line_parsed->req_client)) break;
  1128. }
  1129. if(metrics->req_clients_current_arr.ipv4_size == i){ // Req client not found in array - need to append
  1130. metrics->req_clients_current_arr.ipv4_size++;
  1131. metrics->req_clients_current_arr.ipv4_req_clients = reallocz(metrics->req_clients_current_arr.ipv4_req_clients,
  1132. metrics->req_clients_current_arr.ipv4_size * sizeof(*metrics->req_clients_current_arr.ipv4_req_clients));
  1133. snprintf(metrics->req_clients_current_arr.ipv4_req_clients[metrics->req_clients_current_arr.ipv4_size - 1],
  1134. REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
  1135. }
  1136. }
  1137. /* Unique Client IPv4 Address all-time */
  1138. if(parser_config->chart_config & CHART_REQ_CLIENT_ALL_TIME){
  1139. int i;
  1140. for(i = 0; i < metrics->req_clients_alltime_arr.ipv4_size; i++){
  1141. if(!strcmp(metrics->req_clients_alltime_arr.ipv4_req_clients[i], line_parsed->req_client)) break;
  1142. }
  1143. if(metrics->req_clients_alltime_arr.ipv4_size == i){ // Req client not found in array - need to append
  1144. metrics->req_clients_alltime_arr.ipv4_size++;
  1145. metrics->req_clients_alltime_arr.ipv4_req_clients = reallocz(metrics->req_clients_alltime_arr.ipv4_req_clients,
  1146. metrics->req_clients_alltime_arr.ipv4_size * sizeof(*metrics->req_clients_alltime_arr.ipv4_req_clients));
  1147. snprintf(metrics->req_clients_alltime_arr.ipv4_req_clients[metrics->req_clients_alltime_arr.ipv4_size - 1],
  1148. REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
  1149. }
  1150. }
  1151. }
  1152. }
  1153. /* Extract request method */
  1154. if(parser_config->chart_config & CHART_REQ_METHODS){
  1155. for(int i = 0; i < REQ_METHOD_ARR_SIZE; i++){
  1156. if(!strcmp(line_parsed->req_method, req_method_str[i])){
  1157. metrics->req_method[i]++;
  1158. break;
  1159. }
  1160. }
  1161. }
  1162. /* Extract request protocol */
  1163. if(parser_config->chart_config & CHART_REQ_PROTO){
  1164. if(!strcmp(line_parsed->req_proto, "1") || !strcmp(line_parsed->req_proto, "1.0")) metrics->req_proto.http_1++;
  1165. else if(!strcmp(line_parsed->req_proto, "1.1")) metrics->req_proto.http_1_1++;
  1166. else if(!strcmp(line_parsed->req_proto, "2") || !strcmp(line_parsed->req_proto, "2.0")) metrics->req_proto.http_2++;
  1167. else metrics->req_proto.other++;
  1168. }
  1169. /* Extract bytes received and sent */
  1170. if(parser_config->chart_config & CHART_BANDWIDTH){
  1171. metrics->bandwidth.req_size += line_parsed->req_size;
  1172. metrics->bandwidth.resp_size += line_parsed->resp_size;
  1173. }
  1174. /* Extract request processing time */
  1175. if((parser_config->chart_config & CHART_REQ_PROC_TIME) && line_parsed->req_proc_time){
  1176. if(line_parsed->req_proc_time < metrics->req_proc_time.min || metrics->req_proc_time.min == 0){
  1177. metrics->req_proc_time.min = line_parsed->req_proc_time;
  1178. }
  1179. if(line_parsed->req_proc_time > metrics->req_proc_time.max || metrics->req_proc_time.max == 0){
  1180. metrics->req_proc_time.max = line_parsed->req_proc_time;
  1181. }
  1182. metrics->req_proc_time.sum += line_parsed->req_proc_time;
  1183. metrics->req_proc_time.count++;
  1184. }
  1185. /* Extract response code family, response code & response code type */
  1186. if(parser_config->chart_config & (CHART_RESP_CODE_FAMILY | CHART_RESP_CODE | CHART_RESP_CODE_TYPE)){
  1187. switch(line_parsed->resp_code / 100){
  1188. /* Note: 304 and 401 should be treated as resp_success */
  1189. case 1:
  1190. metrics->resp_code_family.resp_1xx++;
  1191. metrics->resp_code[line_parsed->resp_code - 100]++;
  1192. metrics->resp_code_type.resp_success++;
  1193. break;
  1194. case 2:
  1195. metrics->resp_code_family.resp_2xx++;
  1196. metrics->resp_code[line_parsed->resp_code - 100]++;
  1197. metrics->resp_code_type.resp_success++;
  1198. break;
  1199. case 3:
  1200. metrics->resp_code_family.resp_3xx++;
  1201. metrics->resp_code[line_parsed->resp_code - 100]++;
  1202. if(line_parsed->resp_code == 304) metrics->resp_code_type.resp_success++;
  1203. else metrics->resp_code_type.resp_redirect++;
  1204. break;
  1205. case 4:
  1206. metrics->resp_code_family.resp_4xx++;
  1207. metrics->resp_code[line_parsed->resp_code - 100]++;
  1208. if(line_parsed->resp_code == 401) metrics->resp_code_type.resp_success++;
  1209. else metrics->resp_code_type.resp_bad++;
  1210. break;
  1211. case 5:
  1212. metrics->resp_code_family.resp_5xx++;
  1213. metrics->resp_code[line_parsed->resp_code - 100]++;
  1214. metrics->resp_code_type.resp_error++;
  1215. break;
  1216. default:
  1217. metrics->resp_code_family.other++;
  1218. metrics->resp_code[RESP_CODE_ARR_SIZE - 1]++;
  1219. metrics->resp_code_type.other++;
  1220. break;
  1221. }
  1222. }
  1223. /* Extract SSL protocol */
  1224. if(parser_config->chart_config & CHART_SSL_PROTO){
  1225. if(!strcmp(line_parsed->ssl_proto, "TLSv1")) metrics->ssl_proto.tlsv1++;
  1226. else if(!strcmp(line_parsed->ssl_proto, "TLSv1.1")) metrics->ssl_proto.tlsv1_1++;
  1227. else if(!strcmp(line_parsed->ssl_proto, "TLSv1.2")) metrics->ssl_proto.tlsv1_2++;
  1228. else if(!strcmp(line_parsed->ssl_proto, "TLSv1.3")) metrics->ssl_proto.tlsv1_3++;
  1229. else if(!strcmp(line_parsed->ssl_proto, "SSLv2")) metrics->ssl_proto.sslv2++;
  1230. else if(!strcmp(line_parsed->ssl_proto, "SSLv3")) metrics->ssl_proto.sslv3++;
  1231. else metrics->ssl_proto.other++;
  1232. }
  1233. /* Extract SSL cipher suite */
  1234. // TODO: Reduce number of reallocs
  1235. if((parser_config->chart_config & CHART_SSL_CIPHER) && *line_parsed->ssl_cipher){
  1236. int i;
  1237. for(i = 0; i < metrics->ssl_cipher_arr.size; i++){
  1238. if(!strcmp(metrics->ssl_cipher_arr.ssl_ciphers[i].name, line_parsed->ssl_cipher)){
  1239. metrics->ssl_cipher_arr.ssl_ciphers[i].count++;
  1240. break;
  1241. }
  1242. }
  1243. if(metrics->ssl_cipher_arr.size == i){ // SSL cipher suite not found in array - need to append
  1244. metrics->ssl_cipher_arr.size++;
  1245. metrics->ssl_cipher_arr.ssl_ciphers = reallocz(metrics->ssl_cipher_arr.ssl_ciphers,
  1246. metrics->ssl_cipher_arr.size * sizeof(struct log_parser_metrics_ssl_cipher));
  1247. snprintf( metrics->ssl_cipher_arr.ssl_ciphers[metrics->ssl_cipher_arr.size - 1].name,
  1248. SSL_CIPHER_SUITE_MAX_LEN, "%s", line_parsed->ssl_cipher);
  1249. metrics->ssl_cipher_arr.ssl_ciphers[metrics->ssl_cipher_arr.size - 1].count = 1;
  1250. }
  1251. }
  1252. metrics->timestamp = line_parsed->timestamp;
  1253. }
  1254. /**
  1255. * @brief Try to automatically detect the configuration for a web log parser.
  1256. * @details It tries to automatically detect the configuration to be used for
  1257. * a web log parser, by parsing a single web log line record and trying to pick
  1258. * a matching configuration (from a static list of predefined ones.)
  1259. * @param[in] line Null-terminated web log line to use in guessing the configuration.
  1260. * @param[in] delimiter Delimiter used to break down \p line in separate fields.
  1261. * @returns Pointer to the web log parser configuration if automatic detection
  1262. * was sucessful, otherwise NULL.
  1263. */
  1264. Web_log_parser_config_t *auto_detect_web_log_parser_config(char *line, const char delimiter){
  1265. for(int i = 0; csv_auto_format_guess_matrix[i] != NULL; i++){
  1266. Web_log_parser_config_t *wblp_config = read_web_log_parser_config(csv_auto_format_guess_matrix[i], delimiter);
  1267. if(count_fields(line, delimiter) == wblp_config->num_fields){
  1268. wblp_config->verify_parsed_logs = 1; // Verification must be turned on to be able to pick up parsing_errors
  1269. Log_line_parsed_t line_parsed = (Log_line_parsed_t) {0};
  1270. parse_web_log_line(wblp_config, line, strlen(line), &line_parsed);
  1271. if(line_parsed.parsing_errors == 0){
  1272. return wblp_config;
  1273. }
  1274. }
  1275. freez(wblp_config->fields);
  1276. freez(wblp_config);
  1277. }
  1278. return NULL;
  1279. }