benchmark-procfile-parser.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. /* SPDX-License-Identifier: GPL-3.0-or-later */
  2. #include "config.h"
  3. #include "libnetdata/libnetdata.h"
  4. void netdata_cleanup_and_exit(int ret) {
  5. exit(ret);
  6. }
  7. #define PF_PREFIX "PROCFILE"
  8. #define PFWORDS_INCREASE_STEP 200
  9. #define PFLINES_INCREASE_STEP 10
  10. #define PROCFILE_INCREMENT_BUFFER 512
  11. extern size_t procfile_max_lines;
  12. extern size_t procfile_max_words;
  13. extern size_t procfile_max_allocation;
  14. static inline void pflines_reset(pflines *fl) {
  15. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": resetting lines");
  16. fl->len = 0;
  17. }
  18. static inline void pflines_free(pflines *fl) {
  19. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": freeing lines");
  20. freez(fl);
  21. }
  22. static inline void pfwords_reset(pfwords *fw) {
  23. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": resetting words");
  24. fw->len = 0;
  25. }
  26. static inline void pfwords_add(procfile *ff, char *str) {
  27. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": adding word No %d: '%s'", fw->len, str);
  28. pfwords *fw = ff->words;
  29. if(unlikely(fw->len == fw->size)) {
  30. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": expanding words");
  31. ff->words = fw = reallocz(fw, sizeof(pfwords) + (fw->size + PFWORDS_INCREASE_STEP) * sizeof(char *));
  32. fw->size += PFWORDS_INCREASE_STEP;
  33. }
  34. fw->words[fw->len++] = str;
  35. }
  36. NEVERNULL
  37. static inline size_t *pflines_add(procfile *ff) {
  38. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": adding line %d at word %d", fl->len, first_word);
  39. pflines *fl = ff->lines;
  40. if(unlikely(fl->len == fl->size)) {
  41. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": expanding lines");
  42. ff->lines = fl = reallocz(fl, sizeof(pflines) + (fl->size + PFLINES_INCREASE_STEP) * sizeof(ffline));
  43. fl->size += PFLINES_INCREASE_STEP;
  44. }
  45. ffline *ffl = &fl->lines[fl->len++];
  46. ffl->words = 0;
  47. ffl->first = ff->words->len;
  48. return &ffl->words;
  49. }
  50. NOINLINE
  51. static void procfile_parser(procfile *ff) {
  52. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": Parsing file '%s'", ff->filename);
  53. char *s = ff->data // our current position
  54. , *e = &ff->data[ff->len] // the terminating null
  55. , *t = ff->data; // the first character of a word (or quoted / parenthesized string)
  56. // the look up array to find our type of character
  57. PF_CHAR_TYPE *separators = ff->separators;
  58. char quote = 0; // the quote character - only when in quoted string
  59. size_t opened = 0; // counts the number of open parenthesis
  60. size_t *line_words = pflines_add(ff);
  61. while(s < e) {
  62. PF_CHAR_TYPE ct = separators[(unsigned char)(*s)];
  63. // this is faster than a switch()
  64. // read more here: http://lazarenko.me/switch/
  65. switch(ct) {
  66. case PF_CHAR_IS_SEPARATOR:
  67. if(!quote && !opened) {
  68. if (s != t) {
  69. // separator, but we have word before it
  70. *s = '\0';
  71. pfwords_add(ff, t);
  72. (*line_words)++;
  73. }
  74. t = s + 1;
  75. }
  76. // fallthrough
  77. case PF_CHAR_IS_WORD:
  78. s++;
  79. break;
  80. case PF_CHAR_IS_NEWLINE:
  81. // end of line
  82. *s = '\0';
  83. pfwords_add(ff, t);
  84. (*line_words)++;
  85. t = ++s;
  86. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": ended line %d with %d words", l, ff->lines->lines[l].words);
  87. line_words = pflines_add(ff);
  88. break;
  89. case PF_CHAR_IS_QUOTE:
  90. if(unlikely(!quote && s == t)) {
  91. // quote opened at the beginning
  92. quote = *s;
  93. t = ++s;
  94. }
  95. else if(unlikely(quote && quote == *s)) {
  96. // quote closed
  97. quote = 0;
  98. *s = '\0';
  99. pfwords_add(ff, t);
  100. (*line_words)++;
  101. t = ++s;
  102. }
  103. else
  104. s++;
  105. break;
  106. case PF_CHAR_IS_OPEN:
  107. if(s == t) {
  108. opened++;
  109. t = ++s;
  110. }
  111. else if(opened) {
  112. opened++;
  113. s++;
  114. }
  115. else
  116. s++;
  117. break;
  118. case PF_CHAR_IS_CLOSE:
  119. if(opened) {
  120. opened--;
  121. if(!opened) {
  122. *s = '\0';
  123. pfwords_add(ff, t);
  124. (*line_words)++;
  125. t = ++s;
  126. }
  127. else
  128. s++;
  129. }
  130. else
  131. s++;
  132. break;
  133. default:
  134. fatal("Internal Error: procfile_readall() does not handle all the cases.");
  135. }
  136. }
  137. if(likely(s > t && t < e)) {
  138. // the last word
  139. if(unlikely(ff->len >= ff->size)) {
  140. // we are going to loose the last byte
  141. s = &ff->data[ff->size - 1];
  142. }
  143. *s = '\0';
  144. pfwords_add(ff, t);
  145. (*line_words)++;
  146. // t = ++s;
  147. }
  148. }
  149. procfile *procfile_readall1(procfile *ff) {
  150. // netdata_log_debug(D_PROCFILE, PF_PREFIX ": Reading file '%s'.", ff->filename);
  151. ff->len = 0; // zero the used size
  152. ssize_t r = 1; // read at least once
  153. while(r > 0) {
  154. ssize_t s = ff->len;
  155. ssize_t x = ff->size - s;
  156. if(unlikely(!x)) {
  157. netdata_log_debug(D_PROCFILE, PF_PREFIX ": Expanding data buffer for file '%s'.", procfile_filename(ff));
  158. ff = reallocz(ff, sizeof(procfile) + ff->size + PROCFILE_INCREMENT_BUFFER);
  159. ff->size += PROCFILE_INCREMENT_BUFFER;
  160. }
  161. netdata_log_debug(D_PROCFILE, "Reading file '%s', from position %zd with length %zd", procfile_filename(ff), s, (ssize_t)(ff->size - s));
  162. r = read(ff->fd, &ff->data[s], ff->size - s);
  163. if(unlikely(r == -1)) {
  164. if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) netdata_log_error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd);
  165. procfile_close(ff);
  166. return NULL;
  167. }
  168. ff->len += r;
  169. }
  170. // netdata_log_debug(D_PROCFILE, "Rewinding file '%s'", ff->filename);
  171. if(unlikely(lseek(ff->fd, 0, SEEK_SET) == -1)) {
  172. if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) netdata_log_error(PF_PREFIX ": Cannot rewind on file '%s'.", procfile_filename(ff));
  173. procfile_close(ff);
  174. return NULL;
  175. }
  176. pflines_reset(ff->lines);
  177. pfwords_reset(ff->words);
  178. procfile_parser(ff);
  179. if(unlikely(procfile_adaptive_initial_allocation)) {
  180. if(unlikely(ff->len > procfile_max_allocation)) procfile_max_allocation = ff->len;
  181. if(unlikely(ff->lines->len > procfile_max_lines)) procfile_max_lines = ff->lines->len;
  182. if(unlikely(ff->words->len > procfile_max_words)) procfile_max_words = ff->words->len;
  183. }
  184. // netdata_log_debug(D_PROCFILE, "File '%s' updated.", ff->filename);
  185. return ff;
  186. }
  187. // ==============
  188. // --- Poor man cycle counting.
  189. static unsigned long tsc;
  190. void begin_tsc(void)
  191. {
  192. unsigned long a, d;
  193. asm volatile ("cpuid\nrdtsc" : "=a" (a), "=d" (d) : "0" (0) : "ebx", "ecx");
  194. tsc = ((unsigned long)d << 32) | (unsigned long)a;
  195. }
  196. unsigned long end_tsc(void)
  197. {
  198. unsigned long a, d;
  199. asm volatile ("rdtscp" : "=a" (a), "=d" (d) : : "ecx");
  200. return (((unsigned long)d << 32) | (unsigned long)a) - tsc;
  201. }
  202. // ==============
  203. unsigned long test_netdata_internal(void) {
  204. static procfile *ff = NULL;
  205. ff = procfile_reopen(ff, "/proc/self/status", " \t:,-()/", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
  206. if(!ff) {
  207. fprintf(stderr, "Failed to open filename\n");
  208. exit(1);
  209. }
  210. begin_tsc();
  211. ff = procfile_readall(ff);
  212. unsigned long c = end_tsc();
  213. if(!ff) {
  214. fprintf(stderr, "Failed to read filename\n");
  215. exit(1);
  216. }
  217. return c;
  218. }
  219. unsigned long test_method1(void) {
  220. static procfile *ff = NULL;
  221. ff = procfile_reopen(ff, "/proc/self/status", " \t:,-()/", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
  222. if(!ff) {
  223. fprintf(stderr, "Failed to open filename\n");
  224. exit(1);
  225. }
  226. begin_tsc();
  227. ff = procfile_readall1(ff);
  228. unsigned long c = end_tsc();
  229. if(!ff) {
  230. fprintf(stderr, "Failed to read filename\n");
  231. exit(1);
  232. }
  233. return c;
  234. }
  235. //--- Test
  236. int main(int argc, char **argv)
  237. {
  238. (void)argc; (void)argv;
  239. int i, max = 1000000;
  240. unsigned long c1 = 0;
  241. test_netdata_internal();
  242. for(i = 0; i < max ; i++)
  243. c1 += test_netdata_internal();
  244. unsigned long c2 = 0;
  245. test_method1();
  246. for(i = 0; i < max ; i++)
  247. c2 += test_method1();
  248. printf("netdata internal: completed in %lu cycles, %lu cycles per read, %0.2f %%.\n", c1, c1 / max, (float)c1 * 100.0 / (float)c1);
  249. printf("method1 : completed in %lu cycles, %lu cycles per read, %0.2f %%.\n", c2, c2 / max, (float)c2 * 100.0 / (float)c1);
  250. return 0;
  251. }