parser.c 63 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. /** @file parser.c
  3. * @brief API to parse and search logs
  4. */
  5. #if !defined(_XOPEN_SOURCE) && !defined(__DARWIN__) && !defined(__APPLE__) && !defined(__FreeBSD__)
  6. /* _XOPEN_SOURCE 700 required by strptime (POSIX 2004) and strndup (POSIX 2008)
  7. * Will need to find a cleaner way of doing this, as currently defining
  8. * _XOPEN_SOURCE 700 can cause issues on Centos 7, MacOS and FreeBSD too. */
  9. #define _XOPEN_SOURCE 700
  10. /* _BSD_SOURCE (glibc <= 2.19) and _DEFAULT_SOURCE (glibc >= 2.20) are required
  11. * to silence "warning: implicit declaration of function ‘strsep’;" that is
  12. * included through libnetdata/inlined.h. */
  13. #define _BSD_SOURCE
  14. #define _DEFAULT_SOURCE
  15. #include <time.h>
  16. #endif
  17. #include "parser.h"
  18. #include "helper.h"
  19. #include <stdio.h>
  20. #include <sys/resource.h>
  21. #include <math.h>
  22. #include <string.h>
  23. static regex_t vhost_regex, req_client_regex, cipher_suite_regex;
  24. const char* const csv_auto_format_guess_matrix[] = {
  25. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time $upstream_response_time", // csvVhostCustom4
  26. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time", // csvVhostCustom3
  27. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - -", // csvVhostCombined
  28. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time $upstream_response_time", // csvVhostCustom2
  29. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time", // csvVhostCustom1
  30. "$host:$server_port $remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent", // csvVhostCommon
  31. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time $upstream_response_time", // csvCustom4
  32. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - - $request_length $request_time", // csvCustom3
  33. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent - -", // csvCombined
  34. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time $upstream_response_time", // csvCustom2
  35. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent $request_length $request_time", // csvCustom1
  36. "$remote_addr - - [$time_local] \"$request\" $status $body_bytes_sent", // csvCommon
  37. NULL}
  38. ;
  39. UNIT_STATIC int count_fields(const char *line, const char delimiter){
  40. const char *ptr;
  41. int cnt, fQuote;
  42. for (cnt = 1, fQuote = 0, ptr = line; *ptr != '\n' && *ptr != '\r' && *ptr != '\0'; ptr++ ){
  43. if (fQuote) {
  44. if (*ptr == '\"') {
  45. if ( ptr[1] == '\"' ) {
  46. ptr++;
  47. continue;
  48. }
  49. fQuote = 0;
  50. }
  51. continue;
  52. }
  53. if(*ptr == '\"'){
  54. fQuote = 1;
  55. continue;
  56. }
  57. if(*ptr == delimiter){
  58. cnt++;
  59. while(*(ptr+1) == delimiter) ptr++;
  60. continue;
  61. }
  62. }
  63. if (fQuote) {
  64. return -1;
  65. }
  66. return cnt;
  67. }
  68. /**
  69. * @brief Parse a delimited string into an array of strings.
  70. * @details Given a string containing no linebreaks, or containing line breaks
  71. * which are escaped by "double quotes", extract a NULL-terminated
  72. * array of strings, one for every delimiter-separated value in the row.
  73. * @param[in] line The input string to be parsed.
  74. * @param[in] delimiter The delimiter to be used to split the string.
  75. * @param[in] num_fields The expected number of fields in \p line. If a negative
  76. * number is provided, they will be counted.
  77. * @return A NULL-terminated array of strings with the delimited values in \p line,
  78. * or NULL in any other case.
  79. * @todo This function has not been benchmarked or optimised.
  80. */
  81. static inline char **parse_csv( const char *line, const char delimiter, int num_fields) {
  82. char **buf, **bptr, *tmp, *tptr;
  83. const char *ptr;
  84. int fQuote, fEnd;
  85. if(num_fields < 0){
  86. num_fields = count_fields(line, delimiter);
  87. if ( num_fields == -1 ) {
  88. return NULL;
  89. }
  90. }
  91. buf = mallocz( sizeof(char*) * (num_fields+1) );
  92. tmp = mallocz( strlen(line) + 1 );
  93. bptr = buf;
  94. for ( ptr = line, fQuote = 0, *tmp = '\0', tptr = tmp, fEnd = 0; ; ptr++ ) {
  95. if ( fQuote ) {
  96. if ( !*ptr ) {
  97. break;
  98. }
  99. if ( *ptr == '\"' ) {
  100. if ( ptr[1] == '\"' ) {
  101. *tptr++ = '\"';
  102. ptr++;
  103. continue;
  104. }
  105. fQuote = 0;
  106. }
  107. else {
  108. *tptr++ = *ptr;
  109. }
  110. continue;
  111. }
  112. if(*ptr == '\"'){
  113. fQuote = 1;
  114. continue;
  115. }
  116. else if(*ptr == '\0'){
  117. fEnd = 1;
  118. *tptr = '\0';
  119. *bptr = strdupz( tmp );
  120. if ( !*bptr ) {
  121. for ( bptr--; bptr >= buf; bptr-- ) {
  122. freez( *bptr );
  123. }
  124. freez( buf );
  125. freez( tmp );
  126. return NULL;
  127. }
  128. bptr++;
  129. tptr = tmp;
  130. break;
  131. }
  132. else if(*ptr == delimiter){
  133. *tptr = '\0';
  134. *bptr = strdupz( tmp );
  135. if ( !*bptr ) {
  136. for ( bptr--; bptr >= buf; bptr-- ) {
  137. freez( *bptr );
  138. }
  139. freez( buf );
  140. freez( tmp );
  141. return NULL;
  142. }
  143. bptr++;
  144. tptr = tmp;
  145. continue;
  146. }
  147. else{
  148. *tptr++ = *ptr;
  149. continue;
  150. }
  151. if ( fEnd ) {
  152. break;
  153. }
  154. }
  155. *bptr = NULL;
  156. freez( tmp );
  157. return buf;
  158. }
  159. /**
  160. * @brief Search a buffer for a keyword (or regular expression)
  161. * @details Search the source buffer for a keyword (or regular expression) and
  162. * copy matches to the destination buffer.
  163. * @param[in] src The source buffer to be searched
  164. * @param[in] src_sz Size of \p src
  165. * @param[in, out] dest The destination buffer where the results will be
  166. * written out to. If NULL, the results will just be discarded.
  167. * @param[out] dest_sz Size of \p dest
  168. * @param[in] keyword The keyword or pattern to be searched in the src buffer
  169. * @param[in] regex The precompiled regular expression to be search in the
  170. * src buffer. If NULL, \p keyword will be used instead.
  171. * @param[in] ignore_case Perform case insensitive search if 1.
  172. * @return Number of matches, or -1 in case of error
  173. */
  174. int search_keyword( char *src, size_t src_sz __maybe_unused,
  175. char *dest, size_t *dest_sz,
  176. const char *keyword, regex_t *regex,
  177. const int ignore_case){
  178. m_assert(src[src_sz - 1] == '\0', "src[src_sz - 1] should be '\0' but it's not");
  179. m_assert((dest && dest_sz) || (!dest && !dest_sz), "either both dest and dest_sz exist, or none does");
  180. if(unlikely(dest && !dest_sz))
  181. return -1;
  182. regex_t regex_compiled;
  183. if(regex)
  184. regex_compiled = *regex;
  185. else{
  186. char regexString[MAX_REGEX_SIZE];
  187. const int regex_flags = ignore_case ? REG_EXTENDED | REG_NEWLINE | REG_ICASE : REG_EXTENDED | REG_NEWLINE;
  188. snprintf(regexString, MAX_REGEX_SIZE, ".*(%s).*", keyword);
  189. int rc;
  190. if (unlikely((rc = regcomp(&regex_compiled, regexString, regex_flags)))){
  191. size_t regcomp_err_str_size = regerror(rc, &regex_compiled, 0, 0);
  192. char *regcomp_err_str = mallocz(regcomp_err_str_size);
  193. regerror(rc, &regex_compiled, regcomp_err_str, regcomp_err_str_size);
  194. fatal("Could not compile regular expression:%.*s, error: %s", (int) MAX_REGEX_SIZE, regexString, regcomp_err_str);
  195. }
  196. }
  197. regmatch_t groupArray[1];
  198. int matches = 0;
  199. char *cursor = src;
  200. if(dest_sz)
  201. *dest_sz = 0;
  202. for ( ; ; matches++){
  203. if (regexec(&regex_compiled, cursor, 1, groupArray, REG_NOTBOL | REG_NOTEOL))
  204. break; // No more matches
  205. if (groupArray[0].rm_so == -1)
  206. break; // No more groups
  207. size_t match_len = (size_t) (groupArray[0].rm_eo - groupArray[0].rm_so);
  208. // debug_log( "Match %d [%2d-%2d]:%.*s\n", matches, groupArray[0].rm_so,
  209. // groupArray[0].rm_eo, (int) match_len, cursor + groupArray[0].rm_so);
  210. if(dest && dest_sz){
  211. memcpy( &dest[*dest_sz], cursor + groupArray[0].rm_so, match_len);
  212. *dest_sz += match_len + 1;
  213. dest[*dest_sz - 1] = '\n';
  214. }
  215. cursor += groupArray[0].rm_eo;
  216. }
  217. if(!regex)
  218. regfree(&regex_compiled);
  219. return matches;
  220. }
  221. /**
  222. * @brief Extract web log parser configuration from string
  223. * @param[in] log_format String that describes the log format
  224. * @param[in] delimiter Delimiter to be used when parsing a CSV log format
  225. * @return Pointer to struct that contains the extracted log format
  226. * configuration or NULL if no fields found in log_format.
  227. */
  228. Web_log_parser_config_t *read_web_log_parser_config(const char *log_format, const char delimiter){
  229. int num_fields = count_fields(log_format, delimiter);
  230. if(num_fields <= 0) return NULL;
  231. /* If first execution of this function, initialise regexs */
  232. static int regexs_initialised = 0;
  233. // TODO: Tests needed for following regexs.
  234. if(!regexs_initialised){
  235. assert(regcomp(&vhost_regex, "^[a-zA-Z0-9:.-]+$", REG_NOSUB | REG_EXTENDED) == 0);
  236. assert(regcomp(&req_client_regex, "^([0-9a-f:.]+|localhost)$", REG_NOSUB | REG_EXTENDED) == 0);
  237. assert(regcomp(&cipher_suite_regex, "^[A-Z0-9_-]+$", REG_NOSUB | REG_EXTENDED) == 0);
  238. regexs_initialised = 1;
  239. }
  240. Web_log_parser_config_t *wblp_config = callocz(1, sizeof(Web_log_parser_config_t));
  241. wblp_config->num_fields = num_fields;
  242. wblp_config->delimiter = delimiter;
  243. char **parsed_format = parse_csv(log_format, delimiter, num_fields); // parsed_format is NULL-terminated
  244. wblp_config->fields = callocz(num_fields, sizeof(web_log_line_field_t));
  245. unsigned int fields_off = 0;
  246. for(int i = 0; i < num_fields; i++ ){
  247. if(strcmp(parsed_format[i], "$host:$server_port") == 0 ||
  248. strcmp(parsed_format[i], "%v:%p") == 0) {
  249. wblp_config->fields[fields_off++] = VHOST_WITH_PORT;
  250. continue;
  251. }
  252. if(strcmp(parsed_format[i], "$host") == 0 ||
  253. strcmp(parsed_format[i], "$http_host") == 0 ||
  254. strcmp(parsed_format[i], "%v") == 0) {
  255. wblp_config->fields[fields_off++] = VHOST;
  256. continue;
  257. }
  258. if(strcmp(parsed_format[i], "$server_port") == 0 ||
  259. strcmp(parsed_format[i], "%p") == 0) {
  260. wblp_config->fields[fields_off++] = PORT;
  261. continue;
  262. }
  263. if(strcmp(parsed_format[i], "$scheme") == 0) {
  264. wblp_config->fields[fields_off++] = REQ_SCHEME;
  265. continue;
  266. }
  267. if(strcmp(parsed_format[i], "$remote_addr") == 0 ||
  268. strcmp(parsed_format[i], "%a") == 0 ||
  269. strcmp(parsed_format[i], "%h") == 0) {
  270. wblp_config->fields[fields_off++] = REQ_CLIENT;
  271. continue;
  272. }
  273. if(strcmp(parsed_format[i], "$request") == 0 ||
  274. strcmp(parsed_format[i], "%r") == 0) {
  275. wblp_config->fields[fields_off++] = REQ;
  276. continue;
  277. }
  278. if(strcmp(parsed_format[i], "$request_method") == 0 ||
  279. strcmp(parsed_format[i], "%m") == 0) {
  280. wblp_config->fields[fields_off++] = REQ_METHOD;
  281. continue;
  282. }
  283. if(strcmp(parsed_format[i], "$request_uri") == 0 ||
  284. strcmp(parsed_format[i], "%U") == 0) {
  285. wblp_config->fields[fields_off++] = REQ_URL;
  286. continue;
  287. }
  288. if(strcmp(parsed_format[i], "$server_protocol") == 0 ||
  289. strcmp(parsed_format[i], "%H") == 0) {
  290. wblp_config->fields[fields_off++] = REQ_PROTO;
  291. continue;
  292. }
  293. if(strcmp(parsed_format[i], "$request_length") == 0 ||
  294. strcmp(parsed_format[i], "%I") == 0) {
  295. wblp_config->fields[fields_off++] = REQ_SIZE;
  296. continue;
  297. }
  298. if(strcmp(parsed_format[i], "$request_time") == 0 ||
  299. strcmp(parsed_format[i], "%D") == 0) {
  300. wblp_config->fields[fields_off++] = REQ_PROC_TIME;
  301. continue;
  302. }
  303. if(strcmp(parsed_format[i], "$status") == 0 ||
  304. strcmp(parsed_format[i], "%>s") == 0 ||
  305. strcmp(parsed_format[i], "%s") == 0) {
  306. wblp_config->fields[fields_off++] = RESP_CODE;
  307. continue;
  308. }
  309. if(strcmp(parsed_format[i], "$bytes_sent") == 0 ||
  310. strcmp(parsed_format[i], "$body_bytes_sent") == 0 ||
  311. strcmp(parsed_format[i], "%b") == 0 ||
  312. strcmp(parsed_format[i], "%O") == 0 ||
  313. strcmp(parsed_format[i], "%B") == 0) {
  314. wblp_config->fields[fields_off++] = RESP_SIZE;
  315. continue;
  316. }
  317. if(strcmp(parsed_format[i], "$upstream_response_time") == 0) {
  318. wblp_config->fields[fields_off++] = UPS_RESP_TIME;
  319. continue;
  320. }
  321. if(strcmp(parsed_format[i], "$ssl_protocol") == 0) {
  322. wblp_config->fields[fields_off++] = SSL_PROTO;
  323. continue;
  324. }
  325. if(strcmp(parsed_format[i], "$ssl_cipher") == 0) {
  326. wblp_config->fields[fields_off++] = SSL_CIPHER_SUITE;
  327. continue;
  328. }
  329. if(strcmp(parsed_format[i], "$time_local") == 0 || strcmp(parsed_format[i], "[$time_local]") == 0 ||
  330. strcmp(parsed_format[i], "%t") == 0 || strcmp(parsed_format[i], "[%t]") == 0) {
  331. wblp_config->fields = reallocz(wblp_config->fields, (num_fields + 1) * sizeof(web_log_line_field_t));
  332. wblp_config->fields[fields_off++] = TIME;
  333. wblp_config->fields[fields_off++] = TIME; // TIME takes 2 fields
  334. wblp_config->num_fields++; // TIME takes 2 fields
  335. continue;
  336. }
  337. wblp_config->fields[fields_off++] = CUSTOM;
  338. }
  339. for(int i = 0; parsed_format[i] != NULL; i++)
  340. freez(parsed_format[i]);
  341. freez(parsed_format);
  342. return wblp_config;
  343. }
  344. /**
  345. * @brief Parse a web log line to extract individual fields.
  346. * @param[in] wblp_config Configuration that specifies how to parse the line.
  347. * @param[in] line Web log record to be parsed. '\n', '\r' or '\0' terminated.
  348. * @param[out] log_line_parsed Struct that stores the results of parsing.
  349. */
  350. void parse_web_log_line(const Web_log_parser_config_t *wblp_config,
  351. char *line, size_t line_len,
  352. Log_line_parsed_t *log_line_parsed){
  353. /* Read parsing configuration */
  354. web_log_line_field_t *fields_format = wblp_config->fields;
  355. const int num_fields_config = wblp_config->num_fields;
  356. const char delimiter = wblp_config->delimiter;
  357. const int verify = wblp_config->verify_parsed_logs;
  358. /* Consume new lines and spaces at end of line */
  359. for(; line[line_len-1] == '\n' || line[line_len-1] == '\r' || line[line_len-1] == ' '; line_len--);
  360. char *field = line;
  361. char *offset = line;
  362. size_t field_size = 0;
  363. for(int i = 0; i < num_fields_config; i++ ){
  364. /* Consume double quotes and extra delimiters at beginning of field */
  365. while(*field == '"' || *field == delimiter) field++, offset++;
  366. /* Find offset boundaries of next field in line */
  367. while(((size_t)(offset - line) < line_len) && *offset != delimiter) offset++;
  368. if(unlikely(*(offset - 1) == '"')) offset--;
  369. field_size = (size_t) (offset - field);
  370. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  371. debug_log( "Field[%d]:%.*s", i, (int)field_size, field);
  372. #endif
  373. if(fields_format[i] == CUSTOM){
  374. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  375. debug_log( "Item %d (type: CUSTOM or UNKNOWN):%.*s", i, (int)field_size, field);
  376. #endif
  377. goto next_item;
  378. }
  379. char *port = field;
  380. size_t port_size = 0;
  381. size_t vhost_size = 0;
  382. if(fields_format[i] == VHOST_WITH_PORT){
  383. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  384. debug_log( "Item %d (type: VHOST_WITH_PORT):%.*s", i, (int)field_size, field);
  385. #endif
  386. if(unlikely(field[0] == '-' && field_size == 1)){
  387. log_line_parsed->vhost[0] = '\0';
  388. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  389. log_line_parsed->parsing_errors++;
  390. goto next_item;
  391. }
  392. while(*port != ':' && vhost_size < field_size) { port++; vhost_size++; }
  393. if(likely(vhost_size < field_size)) {
  394. /* ':' detected in string */
  395. port++;
  396. port_size = field_size - vhost_size - 1;
  397. field_size = vhost_size; // now field represents vhost and port is separate
  398. }
  399. else {
  400. /* no ':' detected in string - invalid */
  401. log_line_parsed->vhost[0] = '\0';
  402. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  403. log_line_parsed->parsing_errors++;
  404. goto next_item;
  405. }
  406. }
  407. if(fields_format[i] == VHOST_WITH_PORT || fields_format[i] == VHOST){
  408. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  409. debug_log( "Item %d (type: VHOST):%.*s", i, (int)field_size, field);
  410. #endif
  411. if(unlikely(field[0] == '-' && field_size == 1)){
  412. log_line_parsed->vhost[0] = '\0';
  413. log_line_parsed->parsing_errors++;
  414. goto next_item;
  415. }
  416. // TODO: Add below case in code!!!
  417. // nginx $host and $http_host return ipv6 in [], apache doesn't
  418. // TODO: TEST! This case hasn't been tested!
  419. // char *pch = strchr(parsed[i], ']');
  420. // if(pch){
  421. // *pch = '\0';
  422. // memmove(parsed[i], parsed[i]+1, strlen(parsed[i]));
  423. // }
  424. snprintfz(log_line_parsed->vhost, VHOST_MAX_LEN, "%.*s", (int) field_size, field);
  425. if(verify){
  426. // if(field_size >= VHOST_MAX_LEN){
  427. // #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  428. // collector_error("VHOST is invalid");
  429. // #endif
  430. // log_line_parsed->vhost[0] = '\0';
  431. // log_line_parsed->parsing_errors++;
  432. // goto next_item; // TODO: Not entirely right, as it will also skip PORT parsing in case of VHOST_WITH_PORT
  433. // }
  434. if(unlikely(regexec(&vhost_regex, log_line_parsed->vhost, 0, NULL, 0) == REG_NOMATCH)){
  435. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  436. collector_error("VHOST is invalid");
  437. #endif
  438. // log_line_parsed->vhost[0] = 'invalid';
  439. snprintf(log_line_parsed->vhost, sizeof(WEB_LOG_INVALID_HOST_STR), WEB_LOG_INVALID_HOST_STR);
  440. log_line_parsed->parsing_errors++;
  441. }
  442. }
  443. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  444. debug_log( "Extracted VHOST:%s", log_line_parsed->vhost);
  445. #endif
  446. if(fields_format[i] == VHOST) goto next_item;
  447. }
  448. if(fields_format[i] == VHOST_WITH_PORT || fields_format[i] == PORT){
  449. if(fields_format[i] != VHOST_WITH_PORT){
  450. port = field;
  451. port_size = field_size;
  452. }
  453. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  454. debug_log( "Item %d (type: PORT):%.*s", i, (int) port_size, port);
  455. #endif
  456. if(unlikely(port[0] == '-' && port_size == 1)){
  457. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  458. log_line_parsed->parsing_errors++;
  459. goto next_item;
  460. }
  461. char port_d[PORT_MAX_LEN];
  462. snprintfz( port_d, PORT_MAX_LEN, "%.*s", (int) port_size, port);
  463. if(likely(str2int(&log_line_parsed->port, port_d, 10) == STR2XX_SUCCESS)){
  464. if(verify){
  465. if(unlikely(log_line_parsed->port < 80 || log_line_parsed->port > 49151)){
  466. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  467. collector_error("PORT is invalid (<80 or >49151)");
  468. #endif
  469. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  470. log_line_parsed->parsing_errors++;
  471. }
  472. }
  473. }
  474. else{
  475. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  476. collector_error("Error while extracting PORT from string");
  477. #endif
  478. log_line_parsed->port = WEB_LOG_INVALID_PORT;
  479. log_line_parsed->parsing_errors++;
  480. }
  481. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  482. debug_log( "Extracted PORT:%d", log_line_parsed->port);
  483. #endif
  484. goto next_item;
  485. }
  486. if(fields_format[i] == REQ_SCHEME){
  487. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  488. debug_log( "Item %d (type: REQ_SCHEME):%.*s", i, (int)field_size, field);
  489. #endif
  490. if(unlikely(field[0] == '-' && field_size == 1)){
  491. log_line_parsed->req_scheme[0] = '\0';
  492. log_line_parsed->parsing_errors++;
  493. goto next_item;
  494. }
  495. snprintfz(log_line_parsed->req_scheme, REQ_SCHEME_MAX_LEN, "%.*s", (int) field_size, field);
  496. if(verify){
  497. if(unlikely( strcmp(log_line_parsed->req_scheme, "http") &&
  498. strcmp(log_line_parsed->req_scheme, "https"))){
  499. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  500. collector_error("REQ_SCHEME is invalid (must be either 'http' or 'https')");
  501. #endif
  502. log_line_parsed->req_scheme[0] = '\0';
  503. log_line_parsed->parsing_errors++;
  504. }
  505. }
  506. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  507. debug_log( "Extracted REQ_SCHEME:%s", log_line_parsed->req_scheme);
  508. #endif
  509. goto next_item;
  510. }
  511. if(fields_format[i] == REQ_CLIENT){
  512. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  513. debug_log( "Item %d (type: REQ_CLIENT):%.*s", i, (int)field_size, field);
  514. #endif
  515. if(unlikely(field[0] == '-' && field_size == 1)){
  516. log_line_parsed->req_client[0] = '\0';
  517. log_line_parsed->parsing_errors++;
  518. goto next_item;
  519. }
  520. snprintfz(log_line_parsed->req_client, REQ_CLIENT_MAX_LEN, "%.*s", (int)field_size, field);
  521. if(verify){
  522. int regex_rc = regexec(&req_client_regex, log_line_parsed->req_client, 0, NULL, 0);
  523. if (likely(regex_rc == 0)) {/* do nothing */}
  524. else if (unlikely(regex_rc == REG_NOMATCH)) {
  525. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  526. collector_error("REQ_CLIENT is invalid");
  527. #endif
  528. snprintf(log_line_parsed->req_client, REQ_CLIENT_MAX_LEN, "%s", WEB_LOG_INVALID_CLIENT_IP_STR);
  529. log_line_parsed->parsing_errors++;
  530. }
  531. else {
  532. size_t err_msg_size = regerror(regex_rc, &req_client_regex, NULL, 0);
  533. char *err_msg = mallocz(err_msg_size);
  534. regerror(regex_rc, &req_client_regex, err_msg, err_msg_size);
  535. collector_error("req_client_regex error:%s", err_msg);
  536. freez(err_msg);
  537. m_assert(0, "req_client_regex has failed");
  538. }
  539. }
  540. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  541. debug_log( "Extracted REQ_CLIENT:%s", log_line_parsed->req_client);
  542. #endif
  543. goto next_item;
  544. }
  545. if(fields_format[i] == REQ || fields_format[i] == REQ_METHOD){
  546. /* If fields_format[i] == REQ, then field is filled in with request in the previous code */
  547. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  548. debug_log( "Item %d (type: REQ or REQ_METHOD):%.*s", i, (int)field_size, field);
  549. #endif
  550. snprintfz( log_line_parsed->req_method, REQ_METHOD_MAX_LEN, "%.*s", (int)field_size, field);
  551. if(verify){
  552. if( unlikely(
  553. /* GET and POST are the most common requests, so check them first */
  554. strcmp(log_line_parsed->req_method, "GET") &&
  555. strcmp(log_line_parsed->req_method, "POST") &&
  556. strcmp(log_line_parsed->req_method, "ACL") &&
  557. strcmp(log_line_parsed->req_method, "BASELINE-CONTROL") &&
  558. strcmp(log_line_parsed->req_method, "BIND") &&
  559. strcmp(log_line_parsed->req_method, "CHECKIN") &&
  560. strcmp(log_line_parsed->req_method, "CHECKOUT") &&
  561. strcmp(log_line_parsed->req_method, "CONNECT") &&
  562. strcmp(log_line_parsed->req_method, "COPY") &&
  563. strcmp(log_line_parsed->req_method, "DELETE") &&
  564. strcmp(log_line_parsed->req_method, "HEAD") &&
  565. strcmp(log_line_parsed->req_method, "LABEL") &&
  566. strcmp(log_line_parsed->req_method, "LINK") &&
  567. strcmp(log_line_parsed->req_method, "LOCK") &&
  568. strcmp(log_line_parsed->req_method, "MERGE") &&
  569. strcmp(log_line_parsed->req_method, "MKACTIVITY") &&
  570. strcmp(log_line_parsed->req_method, "MKCALENDAR") &&
  571. strcmp(log_line_parsed->req_method, "MKCOL") &&
  572. strcmp(log_line_parsed->req_method, "MKREDIRECTREF") &&
  573. strcmp(log_line_parsed->req_method, "MKWORKSPACE") &&
  574. strcmp(log_line_parsed->req_method, "MOVE") &&
  575. strcmp(log_line_parsed->req_method, "OPTIONS") &&
  576. strcmp(log_line_parsed->req_method, "ORDERPATCH") &&
  577. strcmp(log_line_parsed->req_method, "PATCH") &&
  578. strcmp(log_line_parsed->req_method, "PRI") &&
  579. strcmp(log_line_parsed->req_method, "PROPFIND") &&
  580. strcmp(log_line_parsed->req_method, "PROPPATCH") &&
  581. strcmp(log_line_parsed->req_method, "PUT") &&
  582. strcmp(log_line_parsed->req_method, "REBIND") &&
  583. strcmp(log_line_parsed->req_method, "REPORT") &&
  584. strcmp(log_line_parsed->req_method, "SEARCH") &&
  585. strcmp(log_line_parsed->req_method, "TRACE") &&
  586. strcmp(log_line_parsed->req_method, "UNBIND") &&
  587. strcmp(log_line_parsed->req_method, "UNCHECKOUT") &&
  588. strcmp(log_line_parsed->req_method, "UNLINK") &&
  589. strcmp(log_line_parsed->req_method, "UNLOCK") &&
  590. strcmp(log_line_parsed->req_method, "UPDATE") &&
  591. strcmp(log_line_parsed->req_method, "UPDATEREDIRECTREF") &&
  592. strcmp(log_line_parsed->req_method, "-"))) {
  593. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  594. collector_error("REQ_METHOD is invalid");
  595. #endif
  596. log_line_parsed->req_method[0] = '\0';
  597. log_line_parsed->parsing_errors++;
  598. }
  599. }
  600. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  601. debug_log( "Extracted REQ_METHOD:%s", log_line_parsed->req_method);
  602. #endif
  603. if(fields_format[i] == REQ && field[0] != '-') {
  604. while(*(offset + 1) == delimiter) offset++; // Consume extra whitespace characters
  605. field = ++offset;
  606. while(*offset != delimiter && ((size_t)(offset - line) < line_len)) offset++;
  607. field_size = (size_t) (offset - field);
  608. }
  609. else goto next_item;
  610. }
  611. if(fields_format[i] == REQ || fields_format[i] == REQ_URL){
  612. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  613. debug_log( "Item %d (type: REQ or REQ_URL):%.*s", i, (int)field_size, field);
  614. #endif
  615. snprintfz( log_line_parsed->req_URL, REQ_URL_MAX_LEN, "%.*s", (int)field_size, field);
  616. // if(unlikely(field[0] == '-' && field_size == 1)){
  617. // log_line_parsed->req_method[0] = '\0';
  618. // log_line_parsed->parsing_errors++;
  619. // }
  620. //if(verify){} ??
  621. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  622. debug_log( "Extracted REQ_URL:%s", log_line_parsed->req_URL ? log_line_parsed->req_URL : "NULL!");
  623. #endif
  624. if(fields_format[i] == REQ) {
  625. while(*(offset + 1) == delimiter) offset++; // Consume extra whitespace characters
  626. field = ++offset;
  627. while(*offset != delimiter && ((size_t)(offset - line) < line_len)) offset++;
  628. field_size = (size_t) (offset - field);
  629. }
  630. else goto next_item;
  631. }
  632. if(fields_format[i] == REQ || fields_format[i] == REQ_PROTO){
  633. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  634. debug_log( "Item %d (type: REQ or REQ_PROTO):%.*s", i, (int)field_size, field);
  635. #endif
  636. if(unlikely(field[0] == '-' && field_size == 1)){
  637. log_line_parsed->req_proto[0] = '\0';
  638. log_line_parsed->parsing_errors++;
  639. goto next_item;
  640. }
  641. if(unlikely( field_size > REQ_PROTO_PREF_SIZE + REQ_PROTO_MAX_LEN - 1)){
  642. field_size = REQ_PROTO_PREF_SIZE + REQ_PROTO_MAX_LEN - 1;
  643. }
  644. size_t req_proto_num_size = field_size - REQ_PROTO_PREF_SIZE;
  645. if(verify){
  646. if(unlikely(field_size < 6 ||
  647. req_proto_num_size == 0 ||
  648. strncmp(field, "HTTP/", REQ_PROTO_PREF_SIZE) ||
  649. ( strncmp(&field[REQ_PROTO_PREF_SIZE], "1", req_proto_num_size) &&
  650. strncmp(&field[REQ_PROTO_PREF_SIZE], "1.0", req_proto_num_size) &&
  651. strncmp(&field[REQ_PROTO_PREF_SIZE], "1.1", req_proto_num_size) &&
  652. strncmp(&field[REQ_PROTO_PREF_SIZE], "2", req_proto_num_size) &&
  653. strncmp(&field[REQ_PROTO_PREF_SIZE], "2.0", req_proto_num_size)))) {
  654. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  655. collector_error("REQ_PROTO is invalid");
  656. #endif
  657. log_line_parsed->req_proto[0] = '\0';
  658. log_line_parsed->parsing_errors++;
  659. }
  660. else snprintfz( log_line_parsed->req_proto, req_proto_num_size + 1,
  661. "%.*s", (int)req_proto_num_size, &field[REQ_PROTO_PREF_SIZE]);
  662. }
  663. else snprintfz( log_line_parsed->req_proto, req_proto_num_size + 1,
  664. "%.*s", (int)req_proto_num_size, &field[REQ_PROTO_PREF_SIZE]);
  665. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  666. debug_log( "Extracted REQ_PROTO:%s", log_line_parsed->req_proto);
  667. #endif
  668. goto next_item;
  669. }
  670. if(fields_format[i] == REQ_SIZE){
  671. /* TODO: Differentiate between '-' or 0 and an invalid request size.
  672. * right now, all these will set req_size == 0 */
  673. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  674. debug_log( "Item %d (type: REQ_SIZE):%.*s", i, (int)field_size, field);
  675. #endif
  676. char req_size_d[REQ_SIZE_MAX_LEN];
  677. snprintfz( req_size_d, REQ_SIZE_MAX_LEN, "%.*s", (int) field_size, field);
  678. if(field[0] == '-' && field_size == 1) {
  679. log_line_parsed->req_size = 0; // Request size can be '-'
  680. }
  681. else if(likely(str2int(&log_line_parsed->req_size, req_size_d, 10) == STR2XX_SUCCESS)){
  682. if(verify){
  683. if(unlikely(log_line_parsed->req_size < 0)){
  684. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  685. collector_error("REQ_SIZE is invalid (<0)");
  686. #endif
  687. log_line_parsed->req_size = 0;
  688. log_line_parsed->parsing_errors++;
  689. }
  690. }
  691. }
  692. else{
  693. collector_error("Error while extracting REQ_SIZE from string");
  694. log_line_parsed->req_size = 0;
  695. log_line_parsed->parsing_errors++;
  696. }
  697. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  698. debug_log( "Extracted REQ_SIZE:%d", log_line_parsed->req_size);
  699. #endif
  700. goto next_item;
  701. }
  702. if(fields_format[i] == REQ_PROC_TIME){
  703. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  704. debug_log( "Item %d (type: REQ_PROC_TIME):%.*s", i, (int)field_size, field);
  705. #endif
  706. if(unlikely(field[0] == '-' && field_size == 1)){
  707. log_line_parsed->req_proc_time = WEB_LOG_INVALID_PORT;
  708. log_line_parsed->parsing_errors++;
  709. goto next_item;
  710. }
  711. float f = 0;
  712. char req_proc_time_d[REQ_PROC_TIME_MAX_LEN];
  713. snprintfz( req_proc_time_d, REQ_PROC_TIME_MAX_LEN, "%.*s", (int) field_size, field);
  714. if(memchr(field, '.', field_size)){ // nginx time is in seconds with a milliseconds resolution.
  715. if(likely(str2float(&f, req_proc_time_d) == STR2XX_SUCCESS)){
  716. log_line_parsed->req_proc_time = (int) (f * 1.0E6);
  717. }
  718. else {
  719. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  720. collector_error("Error while extracting REQ_PROC_TIME from string");
  721. #endif
  722. log_line_parsed->req_proc_time = 0;
  723. log_line_parsed->parsing_errors++;
  724. }
  725. }
  726. else{ // apache time is in microseconds
  727. if(unlikely(str2int(&log_line_parsed->req_proc_time, req_proc_time_d, 10) != STR2XX_SUCCESS)) {
  728. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  729. collector_error("Error while extracting REQ_PROC_TIME from string");
  730. #endif
  731. log_line_parsed->req_proc_time = 0;
  732. log_line_parsed->parsing_errors++;
  733. }
  734. }
  735. if(verify){
  736. if(unlikely(log_line_parsed->req_proc_time < 0)){
  737. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  738. collector_error("REQ_PROC_TIME is invalid (<0)");
  739. #endif
  740. log_line_parsed->req_proc_time = 0;
  741. log_line_parsed->parsing_errors++;
  742. }
  743. }
  744. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  745. debug_log( "Extracted REQ_PROC_TIME:%d", log_line_parsed->req_proc_time);
  746. #endif
  747. goto next_item;
  748. }
  749. if(fields_format[i] == RESP_CODE){
  750. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  751. debug_log( "Item %d (type: RESP_CODE):%.*s\n", i, (int)field_size, field);
  752. #endif
  753. if(unlikely(field[0] == '-' && field_size == 1)){
  754. log_line_parsed->resp_code = 0;
  755. log_line_parsed->parsing_errors++;
  756. goto next_item;
  757. }
  758. char resp_code_d[REQ_RESP_CODE_MAX_LEN];
  759. snprintfz( resp_code_d, REQ_RESP_CODE_MAX_LEN, "%.*s", (int)field_size, field);
  760. if(likely(str2int(&log_line_parsed->resp_code, resp_code_d, 10) == STR2XX_SUCCESS)){
  761. if(verify){
  762. /* rfc7231
  763. * Informational responses (100–199),
  764. * Successful responses (200–299),
  765. * Redirects (300–399),
  766. * Client errors (400–499),
  767. * Server errors (500–599). */
  768. if(unlikely(log_line_parsed->resp_code < 100 || log_line_parsed->resp_code > 599)){
  769. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  770. collector_error("RESP_CODE is invalid (<100 or >599)");
  771. #endif
  772. log_line_parsed->resp_code = 0;
  773. log_line_parsed->parsing_errors++;
  774. }
  775. }
  776. }
  777. else{
  778. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  779. collector_error("Error while extracting RESP_CODE from string");
  780. #endif
  781. log_line_parsed->resp_code = 0;
  782. log_line_parsed->parsing_errors++;
  783. }
  784. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  785. debug_log( "Extracted RESP_CODE:%d", log_line_parsed->resp_code);
  786. #endif
  787. goto next_item;
  788. }
  789. if(fields_format[i] == RESP_SIZE){
  790. /* TODO: Differentiate between '-' or 0 and an invalid response size.
  791. * right now, all these will set resp_size == 0 */
  792. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  793. debug_log( "Item %d (type: RESP_SIZE):%.*s", i, (int)field_size, field);
  794. #endif
  795. char resp_size_d[REQ_RESP_SIZE_MAX_LEN];
  796. snprintfz( resp_size_d, REQ_RESP_SIZE_MAX_LEN, "%.*s", (int)field_size, field);
  797. if(field[0] == '-' && field_size == 1) {
  798. log_line_parsed->resp_size = 0; // Response size can be '-'
  799. }
  800. else if(likely(str2int(&log_line_parsed->resp_size, resp_size_d, 10) == STR2XX_SUCCESS)){
  801. if(verify){
  802. if(unlikely(log_line_parsed->resp_size < 0)){
  803. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  804. collector_error("RESP_SIZE is invalid (<0)");
  805. #endif
  806. log_line_parsed->resp_size = 0;
  807. log_line_parsed->parsing_errors++;
  808. }
  809. }
  810. }
  811. else {
  812. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  813. collector_error("Error while extracting RESP_SIZE from string");
  814. #endif
  815. log_line_parsed->resp_size = 0;
  816. log_line_parsed->parsing_errors++;
  817. }
  818. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  819. debug_log( "Extracted RESP_SIZE:%d", log_line_parsed->resp_size);
  820. #endif
  821. goto next_item;
  822. }
  823. if(fields_format[i] == UPS_RESP_TIME){
  824. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  825. debug_log( "Item %d (type: UPS_RESP_TIME):%.*s", i, (int)field_size, field);
  826. #endif
  827. if(field[0] == '-' && field_size == 1) {
  828. log_line_parsed->ups_resp_time = 0;
  829. log_line_parsed->parsing_errors++;
  830. goto next_item;
  831. }
  832. /* Times of several responses are separated by commas and colons. Following the
  833. * Go parser implementation, where only the first one is kept, the others are
  834. * discarded. Also, there must be no space in between them. Needs testing... */
  835. char *pch = memchr(field, ',', field_size);
  836. if(pch) field_size = pch - field;
  837. float f = 0;
  838. char ups_resp_time_d[UPS_RESP_TIME_MAX_LEN];
  839. snprintfz( ups_resp_time_d, UPS_RESP_TIME_MAX_LEN, "%.*s", (int)field_size, field);
  840. if(memchr(field, '.', field_size)){ // nginx time is in seconds with a milliseconds resolution.
  841. if(likely(str2float(&f, ups_resp_time_d) == STR2XX_SUCCESS)){
  842. log_line_parsed->ups_resp_time = (int) (f * 1.0E6);
  843. }
  844. else {
  845. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  846. collector_error("Error while extracting UPS_RESP_TIME from string");
  847. #endif
  848. log_line_parsed->ups_resp_time = 0;
  849. log_line_parsed->parsing_errors++;
  850. }
  851. }
  852. else{ // unlike in the REQ_PROC_TIME case, apache doesn't have an equivalent here
  853. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  854. collector_error("Error while extracting UPS_RESP_TIME from string");
  855. #endif
  856. log_line_parsed->ups_resp_time = 0;
  857. log_line_parsed->parsing_errors++;
  858. }
  859. if(verify){
  860. if(unlikely(log_line_parsed->ups_resp_time < 0)){
  861. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  862. collector_error("UPS_RESP_TIME is invalid (<0)");
  863. #endif
  864. log_line_parsed->ups_resp_time = 0;
  865. log_line_parsed->parsing_errors++;
  866. }
  867. }
  868. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  869. debug_log( "Extracted UPS_RESP_TIME:%d", log_line_parsed->ups_resp_time);
  870. #endif
  871. goto next_item;
  872. }
  873. if(fields_format[i] == SSL_PROTO){
  874. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  875. debug_log( "Item %d (type: SSL_PROTO):%.*s", i, (int)field_size, field);
  876. #endif
  877. if(field[0] == '-' && field_size == 1) {
  878. log_line_parsed->ssl_proto[0] = '\0';
  879. log_line_parsed->parsing_errors++;
  880. goto next_item;
  881. }
  882. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  883. debug_log( "SSL_PROTO field size:%zu", field_size);
  884. #endif
  885. snprintfz( log_line_parsed->ssl_proto, SSL_PROTO_MAX_LEN, "%.*s", (int)field_size, field);
  886. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  887. debug_log( "log_line_parsed->ssl_proto:%s", log_line_parsed->ssl_proto);
  888. #endif
  889. if(verify){
  890. if(unlikely(strcmp(log_line_parsed->ssl_proto, "TLSv1") &&
  891. strcmp(log_line_parsed->ssl_proto, "TLSv1.1") &&
  892. strcmp(log_line_parsed->ssl_proto, "TLSv1.2") &&
  893. strcmp(log_line_parsed->ssl_proto, "TLSv1.3") &&
  894. strcmp(log_line_parsed->ssl_proto, "SSLv2") &&
  895. strcmp(log_line_parsed->ssl_proto, "SSLv3"))) {
  896. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  897. collector_error("SSL_PROTO is invalid");
  898. #endif
  899. log_line_parsed->ssl_proto[0] = '\0';
  900. log_line_parsed->parsing_errors++;
  901. }
  902. }
  903. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  904. debug_log( "Extracted SSL_PROTO:%s", log_line_parsed->ssl_proto);
  905. #endif
  906. goto next_item;
  907. }
  908. if(fields_format[i] == SSL_CIPHER_SUITE){
  909. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  910. debug_log( "Item %d (type: SSL_CIPHER_SUITE):%.*s", i, (int)field_size, field);
  911. #endif
  912. if(field[0] == '-' && field_size == 1) {
  913. log_line_parsed->ssl_cipher[0] = '\0';
  914. log_line_parsed->parsing_errors++;
  915. }
  916. snprintfz( log_line_parsed->ssl_cipher, SSL_CIPHER_SUITE_MAX_LEN, "%.*s", (int)field_size, field);
  917. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  918. debug_log( "before: SSL_CIPHER_SUITE:%s", log_line_parsed->ssl_cipher);
  919. #endif
  920. if(verify){
  921. int regex_rc = regexec(&cipher_suite_regex, log_line_parsed->ssl_cipher, 0, NULL, 0);
  922. if (likely(regex_rc == 0)){/* do nothing */}
  923. else if (unlikely(regex_rc == REG_NOMATCH)) {
  924. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  925. collector_error("SSL_CIPHER_SUITE is invalid");
  926. #endif
  927. log_line_parsed->ssl_cipher[0] = '\0';
  928. log_line_parsed->parsing_errors++;
  929. }
  930. else {
  931. size_t err_msg_size = regerror(regex_rc, &cipher_suite_regex, NULL, 0);
  932. char *err_msg = mallocz(err_msg_size);
  933. regerror(regex_rc, &cipher_suite_regex, err_msg, err_msg_size);
  934. collector_error("cipher_suite_regex error:%s", err_msg);
  935. freez(err_msg);
  936. m_assert(0, "cipher_suite_regex has failed");
  937. }
  938. }
  939. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  940. debug_log( "Extracted SSL_CIPHER_SUITE:%s", log_line_parsed->ssl_cipher);
  941. #endif
  942. goto next_item;
  943. }
  944. if(fields_format[i] == TIME){
  945. if(wblp_config->skip_timestamp_parsing){
  946. while(*offset != ']') offset++;
  947. i++;
  948. offset++;
  949. goto next_item;
  950. }
  951. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  952. debug_log( "Item %d (type: TIME - 1st of 2 fields):%.*s", i, (int)field_size, field);
  953. #endif
  954. // TODO: What if TIME is invalid?
  955. // if(field[0] == '-' && field_size == 1) {
  956. // log_line_parsed->timestamp = 0;
  957. // log_line_parsed->parsing_errors++;
  958. // ++i;
  959. // goto next_item;
  960. // }
  961. char *datetime = field;
  962. if(memchr(datetime, '[', field_size)) {
  963. datetime++;
  964. field_size--;
  965. }
  966. struct tm ltm = {0};
  967. char *tz_str = strptime(datetime, "%d/%b/%Y:%H:%M:%S", &ltm);
  968. if(unlikely(tz_str == NULL)){
  969. collector_error("TIME datetime parsing failed");
  970. log_line_parsed->timestamp = 0;
  971. log_line_parsed->parsing_errors++;
  972. goto next_item;
  973. }
  974. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  975. debug_log( "strptime() result: year:%d mon:%d day:%d hour:%d min:%d sec:%d",
  976. ltm.tm_year, ltm.tm_mon, ltm.tm_mday,
  977. ltm.tm_hour, ltm.tm_min, ltm.tm_sec);
  978. #endif
  979. /* Deal with 2nd part of datetime i.e. timezone */
  980. m_assert(*tz_str == ' ', "Invalid TIME timezone");
  981. ++tz_str;
  982. m_assert(*tz_str == '+' || *tz_str == '-', "Invalid TIME timezone");
  983. char tz_sign = *tz_str;
  984. char *tz_str_end = ++tz_str;
  985. while(*tz_str_end != ']') tz_str_end++;
  986. m_assert(tz_str_end - tz_str == 4, "Invalid TIME timezone string length");
  987. char tz_num[4];
  988. memcpy(tz_num, tz_str, tz_str_end - tz_str);
  989. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  990. debug_log( "TIME 2nd part: %.*s", (int)(tz_str_end - tz_str), tz_str);
  991. #endif
  992. long int tz = strtol(tz_str, NULL, 10);
  993. long int tz_h = tz / 100;
  994. long int tz_m = tz % 100;
  995. int64_t tz_adj = (int64_t) tz_h * 3600 + (int64_t) tz_m * 60;
  996. if(tz_sign == '+') tz_adj *= -1; // if timezone is positive, we need to subtract it to get GMT
  997. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  998. debug_log( "Timezone: int:%ld, hrs:%ld, mins:%ld", tz, tz_h, tz_m);
  999. #endif
  1000. if(-1 == (log_line_parsed->timestamp = timegm(&ltm) + tz_adj)){
  1001. collector_error("TIME datetime parsing failed");
  1002. log_line_parsed->timestamp = 0;
  1003. log_line_parsed->parsing_errors++;
  1004. }
  1005. #if ENABLE_PARSE_WEB_LOG_LINE_DEBUG
  1006. char tb[80];
  1007. strftime(tb, sizeof(tb), "%c", &ltm );
  1008. debug_log( "Extracted TIME:%ld", log_line_parsed->timestamp);
  1009. debug_log( "Extracted TIME string:%s", tb);
  1010. #endif
  1011. offset = tz_str_end + 1; // WARNING! this modifies the offset but it is required in the TIME case.
  1012. ++i; // TIME takes up 2 fields_format[] spaces, so skip the next one
  1013. goto next_item;
  1014. }
  1015. next_item:
  1016. /* If offset is located beyond the end of the line, terminate parsing */
  1017. if(unlikely((size_t) (offset - line) >= line_len)) break;
  1018. field = ++offset;
  1019. }
  1020. }
  1021. /**
  1022. * @brief Extract web log metrics from a group of web log fields.
  1023. * @param[in] parser_config Configuration specifying how and what web log
  1024. * metrics to extract.
  1025. * @param[in] line_parsed Web logs fields extracted from a web log line.
  1026. * @param[out] metrics Web logs metrics exctracted from the \p line_parsed
  1027. * web log fields, using the \p parser_config configuration.
  1028. */
  1029. void extract_web_log_metrics(Log_parser_config_t *parser_config,
  1030. Log_line_parsed_t *line_parsed,
  1031. Web_log_metrics_t *metrics){
  1032. /* Extract number of parsed lines */
  1033. /* NOTE: Commented out as it is done in flb_collect_logs_cb() now. */
  1034. // metrics->num_lines++;
  1035. /* Extract vhost */
  1036. // TODO: Reduce number of reallocs
  1037. if((parser_config->chart_config & CHART_VHOST) && *line_parsed->vhost){
  1038. int i;
  1039. for(i = 0; i < metrics->vhost_arr.size; i++){
  1040. if(!strcmp(metrics->vhost_arr.vhosts[i].name, line_parsed->vhost)){
  1041. metrics->vhost_arr.vhosts[i].count++;
  1042. break;
  1043. }
  1044. }
  1045. if(metrics->vhost_arr.size == i){ // Vhost not found in array - need to append
  1046. metrics->vhost_arr.size++;
  1047. if(metrics->vhost_arr.size >= metrics->vhost_arr.size_max){
  1048. metrics->vhost_arr.size_max = metrics->vhost_arr.size * VHOST_BUFFS_SCALE_FACTOR + 1;
  1049. metrics->vhost_arr.vhosts = reallocz( metrics->vhost_arr.vhosts,
  1050. metrics->vhost_arr.size_max * sizeof(struct log_parser_metrics_vhost));
  1051. }
  1052. snprintf(metrics->vhost_arr.vhosts[metrics->vhost_arr.size - 1].name, VHOST_MAX_LEN, "%s", line_parsed->vhost);
  1053. metrics->vhost_arr.vhosts[metrics->vhost_arr.size - 1].count = 1;
  1054. }
  1055. }
  1056. /* Extract port */
  1057. // TODO: Reduce number of reallocs
  1058. if((parser_config->chart_config & CHART_PORT) && line_parsed->port){
  1059. int i;
  1060. for(i = 0; i < metrics->port_arr.size; i++){
  1061. if(metrics->port_arr.ports[i].port == line_parsed->port){
  1062. metrics->port_arr.ports[i].count++;
  1063. break;
  1064. }
  1065. }
  1066. if(metrics->port_arr.size == i){ // Port not found in array - need to append
  1067. metrics->port_arr.size++;
  1068. if(metrics->port_arr.size >= metrics->port_arr.size_max){
  1069. metrics->port_arr.size_max = metrics->port_arr.size * PORT_BUFFS_SCALE_FACTOR + 1;
  1070. metrics->port_arr.ports = reallocz( metrics->port_arr.ports,
  1071. metrics->port_arr.size_max * sizeof(struct log_parser_metrics_port));
  1072. }
  1073. if(line_parsed->port == WEB_LOG_INVALID_PORT)
  1074. snprintfz(metrics->port_arr.ports[metrics->port_arr.size - 1].name, PORT_MAX_LEN, WEB_LOG_INVALID_PORT_STR);
  1075. else
  1076. snprintfz(metrics->port_arr.ports[metrics->port_arr.size - 1].name, PORT_MAX_LEN, "%d", line_parsed->port);
  1077. metrics->port_arr.ports[metrics->port_arr.size - 1].port = line_parsed->port;
  1078. metrics->port_arr.ports[metrics->port_arr.size - 1].count = 1;
  1079. }
  1080. }
  1081. /* Extract client metrics */
  1082. if(( parser_config->chart_config & ( CHART_IP_VERSION | CHART_REQ_CLIENT_CURRENT | CHART_REQ_CLIENT_ALL_TIME)) && *line_parsed->req_client) {
  1083. /* Invalid IP version */
  1084. if(unlikely(!strcmp(line_parsed->req_client, WEB_LOG_INVALID_CLIENT_IP_STR))){
  1085. if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.invalid++;
  1086. }
  1087. else if(strchr(line_parsed->req_client, ':')){
  1088. /* IPv6 version */
  1089. if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.v6++;
  1090. /* Unique Client IPv6 Address current poll */
  1091. if(parser_config->chart_config & CHART_REQ_CLIENT_CURRENT){
  1092. int i;
  1093. for(i = 0; i < metrics->req_clients_current_arr.ipv6_size; i++){
  1094. if(!strcmp(metrics->req_clients_current_arr.ipv6_req_clients[i], line_parsed->req_client)) break;
  1095. }
  1096. if(metrics->req_clients_current_arr.ipv6_size == i){ // Req client not found in array - need to append
  1097. metrics->req_clients_current_arr.ipv6_size++;
  1098. metrics->req_clients_current_arr.ipv6_req_clients = reallocz(metrics->req_clients_current_arr.ipv6_req_clients,
  1099. metrics->req_clients_current_arr.ipv6_size * sizeof(*metrics->req_clients_current_arr.ipv6_req_clients));
  1100. snprintf(metrics->req_clients_current_arr.ipv6_req_clients[metrics->req_clients_current_arr.ipv6_size - 1],
  1101. REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
  1102. }
  1103. }
  1104. /* Unique Client IPv6 Address all-time */
  1105. if(parser_config->chart_config & CHART_REQ_CLIENT_ALL_TIME){
  1106. int i;
  1107. for(i = 0; i < metrics->req_clients_alltime_arr.ipv6_size; i++){
  1108. if(!strcmp(metrics->req_clients_alltime_arr.ipv6_req_clients[i], line_parsed->req_client)) break;
  1109. }
  1110. if(metrics->req_clients_alltime_arr.ipv6_size == i){ // Req client not found in array - need to append
  1111. metrics->req_clients_alltime_arr.ipv6_size++;
  1112. metrics->req_clients_alltime_arr.ipv6_req_clients = reallocz(metrics->req_clients_alltime_arr.ipv6_req_clients,
  1113. metrics->req_clients_alltime_arr.ipv6_size * sizeof(*metrics->req_clients_alltime_arr.ipv6_req_clients));
  1114. snprintf(metrics->req_clients_alltime_arr.ipv6_req_clients[metrics->req_clients_alltime_arr.ipv6_size - 1],
  1115. REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
  1116. }
  1117. }
  1118. }
  1119. else{
  1120. /* IPv4 version */
  1121. if(parser_config->chart_config & CHART_IP_VERSION) metrics->ip_ver.v4++;
  1122. /* Unique Client IPv4 Address current poll */
  1123. if(parser_config->chart_config & CHART_REQ_CLIENT_CURRENT){
  1124. int i;
  1125. for(i = 0; i < metrics->req_clients_current_arr.ipv4_size; i++){
  1126. if(!strcmp(metrics->req_clients_current_arr.ipv4_req_clients[i], line_parsed->req_client)) break;
  1127. }
  1128. if(metrics->req_clients_current_arr.ipv4_size == i){ // Req client not found in array - need to append
  1129. metrics->req_clients_current_arr.ipv4_size++;
  1130. metrics->req_clients_current_arr.ipv4_req_clients = reallocz(metrics->req_clients_current_arr.ipv4_req_clients,
  1131. metrics->req_clients_current_arr.ipv4_size * sizeof(*metrics->req_clients_current_arr.ipv4_req_clients));
  1132. snprintf(metrics->req_clients_current_arr.ipv4_req_clients[metrics->req_clients_current_arr.ipv4_size - 1],
  1133. REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
  1134. }
  1135. }
  1136. /* Unique Client IPv4 Address all-time */
  1137. if(parser_config->chart_config & CHART_REQ_CLIENT_ALL_TIME){
  1138. int i;
  1139. for(i = 0; i < metrics->req_clients_alltime_arr.ipv4_size; i++){
  1140. if(!strcmp(metrics->req_clients_alltime_arr.ipv4_req_clients[i], line_parsed->req_client)) break;
  1141. }
  1142. if(metrics->req_clients_alltime_arr.ipv4_size == i){ // Req client not found in array - need to append
  1143. metrics->req_clients_alltime_arr.ipv4_size++;
  1144. metrics->req_clients_alltime_arr.ipv4_req_clients = reallocz(metrics->req_clients_alltime_arr.ipv4_req_clients,
  1145. metrics->req_clients_alltime_arr.ipv4_size * sizeof(*metrics->req_clients_alltime_arr.ipv4_req_clients));
  1146. snprintf(metrics->req_clients_alltime_arr.ipv4_req_clients[metrics->req_clients_alltime_arr.ipv4_size - 1],
  1147. REQ_CLIENT_MAX_LEN, "%s", line_parsed->req_client);
  1148. }
  1149. }
  1150. }
  1151. }
  1152. /* Extract request method */
  1153. if(parser_config->chart_config & CHART_REQ_METHODS){
  1154. for(int i = 0; i < REQ_METHOD_ARR_SIZE; i++){
  1155. if(!strcmp(line_parsed->req_method, req_method_str[i])){
  1156. metrics->req_method[i]++;
  1157. break;
  1158. }
  1159. }
  1160. }
  1161. /* Extract request protocol */
  1162. if(parser_config->chart_config & CHART_REQ_PROTO){
  1163. if(!strcmp(line_parsed->req_proto, "1") || !strcmp(line_parsed->req_proto, "1.0")) metrics->req_proto.http_1++;
  1164. else if(!strcmp(line_parsed->req_proto, "1.1")) metrics->req_proto.http_1_1++;
  1165. else if(!strcmp(line_parsed->req_proto, "2") || !strcmp(line_parsed->req_proto, "2.0")) metrics->req_proto.http_2++;
  1166. else metrics->req_proto.other++;
  1167. }
  1168. /* Extract bytes received and sent */
  1169. if(parser_config->chart_config & CHART_BANDWIDTH){
  1170. metrics->bandwidth.req_size += line_parsed->req_size;
  1171. metrics->bandwidth.resp_size += line_parsed->resp_size;
  1172. }
  1173. /* Extract request processing time */
  1174. if((parser_config->chart_config & CHART_REQ_PROC_TIME) && line_parsed->req_proc_time){
  1175. if(line_parsed->req_proc_time < metrics->req_proc_time.min || metrics->req_proc_time.min == 0){
  1176. metrics->req_proc_time.min = line_parsed->req_proc_time;
  1177. }
  1178. if(line_parsed->req_proc_time > metrics->req_proc_time.max || metrics->req_proc_time.max == 0){
  1179. metrics->req_proc_time.max = line_parsed->req_proc_time;
  1180. }
  1181. metrics->req_proc_time.sum += line_parsed->req_proc_time;
  1182. metrics->req_proc_time.count++;
  1183. }
  1184. /* Extract response code family, response code & response code type */
  1185. if(parser_config->chart_config & (CHART_RESP_CODE_FAMILY | CHART_RESP_CODE | CHART_RESP_CODE_TYPE)){
  1186. switch(line_parsed->resp_code / 100){
  1187. /* Note: 304 and 401 should be treated as resp_success */
  1188. case 1:
  1189. metrics->resp_code_family.resp_1xx++;
  1190. metrics->resp_code[line_parsed->resp_code - 100]++;
  1191. metrics->resp_code_type.resp_success++;
  1192. break;
  1193. case 2:
  1194. metrics->resp_code_family.resp_2xx++;
  1195. metrics->resp_code[line_parsed->resp_code - 100]++;
  1196. metrics->resp_code_type.resp_success++;
  1197. break;
  1198. case 3:
  1199. metrics->resp_code_family.resp_3xx++;
  1200. metrics->resp_code[line_parsed->resp_code - 100]++;
  1201. if(line_parsed->resp_code == 304) metrics->resp_code_type.resp_success++;
  1202. else metrics->resp_code_type.resp_redirect++;
  1203. break;
  1204. case 4:
  1205. metrics->resp_code_family.resp_4xx++;
  1206. metrics->resp_code[line_parsed->resp_code - 100]++;
  1207. if(line_parsed->resp_code == 401) metrics->resp_code_type.resp_success++;
  1208. else metrics->resp_code_type.resp_bad++;
  1209. break;
  1210. case 5:
  1211. metrics->resp_code_family.resp_5xx++;
  1212. metrics->resp_code[line_parsed->resp_code - 100]++;
  1213. metrics->resp_code_type.resp_error++;
  1214. break;
  1215. default:
  1216. metrics->resp_code_family.other++;
  1217. metrics->resp_code[RESP_CODE_ARR_SIZE - 1]++;
  1218. metrics->resp_code_type.other++;
  1219. break;
  1220. }
  1221. }
  1222. /* Extract SSL protocol */
  1223. if(parser_config->chart_config & CHART_SSL_PROTO){
  1224. if(!strcmp(line_parsed->ssl_proto, "TLSv1")) metrics->ssl_proto.tlsv1++;
  1225. else if(!strcmp(line_parsed->ssl_proto, "TLSv1.1")) metrics->ssl_proto.tlsv1_1++;
  1226. else if(!strcmp(line_parsed->ssl_proto, "TLSv1.2")) metrics->ssl_proto.tlsv1_2++;
  1227. else if(!strcmp(line_parsed->ssl_proto, "TLSv1.3")) metrics->ssl_proto.tlsv1_3++;
  1228. else if(!strcmp(line_parsed->ssl_proto, "SSLv2")) metrics->ssl_proto.sslv2++;
  1229. else if(!strcmp(line_parsed->ssl_proto, "SSLv3")) metrics->ssl_proto.sslv3++;
  1230. else metrics->ssl_proto.other++;
  1231. }
  1232. /* Extract SSL cipher suite */
  1233. // TODO: Reduce number of reallocs
  1234. if((parser_config->chart_config & CHART_SSL_CIPHER) && *line_parsed->ssl_cipher){
  1235. int i;
  1236. for(i = 0; i < metrics->ssl_cipher_arr.size; i++){
  1237. if(!strcmp(metrics->ssl_cipher_arr.ssl_ciphers[i].name, line_parsed->ssl_cipher)){
  1238. metrics->ssl_cipher_arr.ssl_ciphers[i].count++;
  1239. break;
  1240. }
  1241. }
  1242. if(metrics->ssl_cipher_arr.size == i){ // SSL cipher suite not found in array - need to append
  1243. metrics->ssl_cipher_arr.size++;
  1244. metrics->ssl_cipher_arr.ssl_ciphers = reallocz(metrics->ssl_cipher_arr.ssl_ciphers,
  1245. metrics->ssl_cipher_arr.size * sizeof(struct log_parser_metrics_ssl_cipher));
  1246. snprintf( metrics->ssl_cipher_arr.ssl_ciphers[metrics->ssl_cipher_arr.size - 1].name,
  1247. SSL_CIPHER_SUITE_MAX_LEN, "%s", line_parsed->ssl_cipher);
  1248. metrics->ssl_cipher_arr.ssl_ciphers[metrics->ssl_cipher_arr.size - 1].count = 1;
  1249. }
  1250. }
  1251. metrics->timestamp = line_parsed->timestamp;
  1252. }
  1253. /**
  1254. * @brief Try to automatically detect the configuration for a web log parser.
  1255. * @details It tries to automatically detect the configuration to be used for
  1256. * a web log parser, by parsing a single web log line record and trying to pick
  1257. * a matching configuration (from a static list of predefined ones.)
  1258. * @param[in] line Null-terminated web log line to use in guessing the configuration.
  1259. * @param[in] delimiter Delimiter used to break down \p line in separate fields.
  1260. * @returns Pointer to the web log parser configuration if automatic detection
  1261. * was sucessful, otherwise NULL.
  1262. */
  1263. Web_log_parser_config_t *auto_detect_web_log_parser_config(char *line, const char delimiter){
  1264. for(int i = 0; csv_auto_format_guess_matrix[i] != NULL; i++){
  1265. Web_log_parser_config_t *wblp_config = read_web_log_parser_config(csv_auto_format_guess_matrix[i], delimiter);
  1266. if(count_fields(line, delimiter) == wblp_config->num_fields){
  1267. wblp_config->verify_parsed_logs = 1; // Verification must be turned on to be able to pick up parsing_errors
  1268. Log_line_parsed_t line_parsed = (Log_line_parsed_t) {0};
  1269. parse_web_log_line(wblp_config, line, strlen(line), &line_parsed);
  1270. if(line_parsed.parsing_errors == 0){
  1271. return wblp_config;
  1272. }
  1273. }
  1274. freez(wblp_config->fields);
  1275. freez(wblp_config);
  1276. }
  1277. return NULL;
  1278. }