Browse Source

Speed up BUFFER increases (minimize reallocs) (#12792)

* speedup BUFFER increases by forward looking reallocs

* implemented buffer_vsprintf() and optimized buffer_sprintf() to minimize calls to vsnprintfz()

* optimize json generation for well known strings
Costa Tsaousis 2 years ago
parent
commit
47fa3d7089
3 changed files with 114 additions and 56 deletions
  1. 48 19
      libnetdata/buffer/buffer.c
  2. 1 0
      libnetdata/buffer/buffer.h
  3. 65 37
      web/api/formatters/json/json.c

+ 48 - 19
libnetdata/buffer/buffer.c

@@ -136,6 +136,24 @@ void buffer_print_llu(BUFFER *wb, unsigned long long uvalue)
     wb->len += wstr - str;
 }
 
+void buffer_fast_strcat(BUFFER *wb, const char *txt, size_t len) {
+    if(unlikely(!txt || !*txt)) return;
+
+    buffer_need_bytes(wb, len + 1);
+
+    char *s = &wb->buffer[wb->len];
+    const char *end = &txt[len + 1];
+
+    while(txt != end)
+        *s++ = *txt++;
+
+    wb->len += len;
+
+    // keep it NULL terminating
+    // not counting it at wb->len
+    wb->buffer[wb->len] = '\0';
+}
+
 void buffer_strcat(BUFFER *wb, const char *txt)
 {
     // buffer_sprintf(wb, "%s", txt);
@@ -159,8 +177,7 @@ void buffer_strcat(BUFFER *wb, const char *txt)
     if(*txt) {
         debug(D_WEB_BUFFER, "strcat(): increasing web_buffer at position %zu, size = %zu\n", wb->len, wb->size);
         len = strlen(txt);
-        buffer_increase(wb, len);
-        buffer_strcat(wb, txt);
+        buffer_fast_strcat(wb, txt, len);
     }
     else {
         // terminate the string
@@ -236,15 +253,23 @@ void buffer_vsprintf(BUFFER *wb, const char *fmt, va_list args)
 {
     if(unlikely(!fmt || !*fmt)) return;
 
-    buffer_need_bytes(wb, 2);
+    size_t wrote = 0, need = 2, space_remaining = 0;
 
-    size_t len = wb->size - wb->len - 1;
+    do {
+        need += space_remaining * 2;
 
-    wb->len += vsnprintfz(&wb->buffer[wb->len], len, fmt, args);
+        debug(D_WEB_BUFFER, "web_buffer_sprintf(): increasing web_buffer at position %zu, size = %zu, by %zu bytes (wrote = %zu)\n", wb->len, wb->size, need, wrote);
+        buffer_need_bytes(wb, need);
 
-    buffer_overflow_check(wb);
+        space_remaining = wb->size - wb->len - 1;
 
-    // the buffer is \0 terminated by vsnprintfz
+        wrote = (size_t) vsnprintfz(&wb->buffer[wb->len], space_remaining, fmt, args);
+
+    } while(wrote >= space_remaining);
+
+    wb->len += wrote;
+
+    // the buffer is \0 terminated by vsnprintf
 }
 
 void buffer_sprintf(BUFFER *wb, const char *fmt, ...)
@@ -252,22 +277,21 @@ void buffer_sprintf(BUFFER *wb, const char *fmt, ...)
     if(unlikely(!fmt || !*fmt)) return;
 
     va_list args;
-    size_t wrote = 0, need = 2, multiplier = 0, len;
+    size_t wrote = 0, need = 2, space_remaining = 0;
 
     do {
-        need += wrote + multiplier * WEB_DATA_LENGTH_INCREASE_STEP;
-        multiplier++;
+        need += space_remaining * 2;
 
         debug(D_WEB_BUFFER, "web_buffer_sprintf(): increasing web_buffer at position %zu, size = %zu, by %zu bytes (wrote = %zu)\n", wb->len, wb->size, need, wrote);
         buffer_need_bytes(wb, need);
 
-        len = wb->size - wb->len - 1;
+        space_remaining = wb->size - wb->len - 1;
 
         va_start(args, fmt);
-        wrote = (size_t) vsnprintfz(&wb->buffer[wb->len], len, fmt, args);
+        wrote = (size_t) vsnprintfz(&wb->buffer[wb->len], space_remaining, fmt, args);
         va_end(args);
 
-    } while(wrote >= len);
+    } while(wrote >= space_remaining);
 
     wb->len += wrote;
 
@@ -420,16 +444,21 @@ void buffer_increase(BUFFER *b, size_t free_size_required) {
     buffer_overflow_check(b);
 
     size_t left = b->size - b->len;
-
     if(left >= free_size_required) return;
 
-    size_t increase = free_size_required - left;
-    if(increase < WEB_DATA_LENGTH_INCREASE_STEP) increase = WEB_DATA_LENGTH_INCREASE_STEP;
+    size_t wanted = free_size_required - left;
+    size_t minimum = WEB_DATA_LENGTH_INCREASE_STEP;
+    if(minimum > wanted) wanted = minimum;
+
+    size_t optimal = b->size;
+    if(b->size > 5*1024*1024) optimal = b->size / 2;
+
+    if(optimal > wanted) wanted = optimal;
 
-    debug(D_WEB_BUFFER, "Increasing data buffer from size %zu to %zu.", b->size, b->size + increase);
+    debug(D_WEB_BUFFER, "Increasing data buffer from size %zu to %zu.", b->size, b->size + wanted);
 
-    b->buffer = reallocz(b->buffer, b->size + increase + sizeof(BUFFER_OVERFLOW_EOF) + 2);
-    b->size += increase;
+    b->buffer = reallocz(b->buffer, b->size + wanted + sizeof(BUFFER_OVERFLOW_EOF) + 2);
+    b->size += wanted;
 
     buffer_overflow_init(b);
     buffer_overflow_check(b);

+ 1 - 0
libnetdata/buffer/buffer.h

@@ -55,6 +55,7 @@ extern const char *buffer_tostring(BUFFER *wb);
 extern void buffer_reset(BUFFER *wb);
 
 extern void buffer_strcat(BUFFER *wb, const char *txt);
+extern void buffer_fast_strcat(BUFFER *wb, const char *txt, size_t len);
 extern void buffer_rrd_value(BUFFER *wb, calculated_number value);
 
 extern void buffer_date(BUFFER *wb, int year, int month, int day, int hours, int minutes, int seconds);

+ 65 - 37
web/api/formatters/json/json.c

@@ -16,7 +16,7 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
 
     //info("RRD2JSON(): %s: BEGIN", r->st->id);
     int row_annotations = 0, dates, dates_with_new = 0;
-    char kq[2] = "",                    // key quote
+    char kq[2] = "",                        // key quote
             sq[2] = "",                     // string quote
             pre_label[101] = "",            // before each label
             post_label[101] = "",           // after each label
@@ -28,7 +28,8 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
             normal_annotation[201] = "",    // default row annotation
             overflow_annotation[201] = "",  // overflow row annotation
             data_begin[101] = "",           // between labels and values
-            finish[101] = "";               // at the end of everything
+            finish[101] = "",               // at the end of everything
+            object_rows_time[101] = "";
 
     if(datatable) {
         dates = JSON_DATES_JS;
@@ -49,7 +50,7 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
         strcpy(post_value,         "}");
         strcpy(post_line,          "]}");
         snprintfz(data_begin, 100, "\n  ],\n    %srows%s:\n [\n", kq, kq);
-        strcpy(finish,             "\n  ]\n}");
+        strcpy(finish,             "\n]\n}");
 
         snprintfz(overflow_annotation, 200, ",{%sv%s:%sRESET OR OVERFLOW%s},{%sv%s:%sThe counters have been wrapped.%s}", kq, kq, sq, sq, kq, kq, sq, sq);
         snprintfz(normal_annotation,   200, ",{%sv%s:null},{%sv%s:null}", kq, kq, kq, kq);
@@ -76,23 +77,38 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
             dates_with_new = 0;
         }
         if( options & RRDR_OPTION_OBJECTSROWS )
-            strcpy(pre_date, "      { ");
+            strcpy(pre_date, " { ");
         else
-            strcpy(pre_date, "      [ ");
-        strcpy(pre_label,  ", \"");
+            strcpy(pre_date, " [ ");
+        strcpy(pre_label,  ",\"");
         strcpy(post_label, "\"");
-        strcpy(pre_value,  ", ");
+        strcpy(pre_value,  ",");
         if( options & RRDR_OPTION_OBJECTSROWS )
             strcpy(post_line, "}");
         else
             strcpy(post_line, "]");
         snprintfz(data_begin, 100, "],\n    %sdata%s:\n [\n", kq, kq);
-        strcpy(finish,             "\n  ]\n}");
+        strcpy(finish,             "\n]\n}");
 
         buffer_sprintf(wb, "{\n %slabels%s: [", kq, kq);
         buffer_sprintf(wb, "%stime%s", sq, sq);
+
+        if( options & RRDR_OPTION_OBJECTSROWS )
+            snprintfz(object_rows_time, 100, "%stime%s: ", kq, kq);
+
     }
 
+    size_t pre_value_len = strlen(pre_value);
+    size_t post_value_len = strlen(post_value);
+    size_t pre_label_len = strlen(pre_label);
+    size_t post_label_len = strlen(post_label);
+    size_t pre_date_len = strlen(pre_date);
+    size_t post_date_len = strlen(post_date);
+    size_t post_line_len = strlen(post_line);
+    size_t normal_annotation_len = strlen(normal_annotation);
+    size_t overflow_annotation_len = strlen(overflow_annotation);
+    size_t object_rows_time_len = strlen(object_rows_time);
+
     // -------------------------------------------------------------------------
     // print the JSON header
 
@@ -104,18 +120,19 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
         if(unlikely(r->od[c] & RRDR_DIMENSION_HIDDEN)) continue;
         if(unlikely((options & RRDR_OPTION_NONZERO) && !(r->od[c] & RRDR_DIMENSION_NONZERO))) continue;
 
-        buffer_strcat(wb, pre_label);
+        buffer_fast_strcat(wb, pre_label, pre_label_len);
         buffer_strcat(wb, rd->name);
 //        buffer_strcat(wb, ".");
 //        buffer_strcat(wb, rd->rrdset->name);
-        buffer_strcat(wb, post_label);
+        buffer_fast_strcat(wb, post_label, post_label_len);
         i++;
     }
     if(!i) {
-        buffer_strcat(wb, pre_label);
-        buffer_strcat(wb, "no data");
-        buffer_strcat(wb, post_label);
+        buffer_fast_strcat(wb, pre_label, pre_label_len);
+        buffer_fast_strcat(wb, "no data", 7);
+        buffer_fast_strcat(wb, post_label, post_label_len);
     }
+    size_t total_number_of_dimensions = i;
 
     // print the begin of row data
     buffer_strcat(wb, data_begin);
@@ -133,6 +150,13 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
         step = -1;
     }
 
+    // pre-allocate a large enough buffer for us
+    // this does not need to be accurate - it is just a hint to avoid multiple realloc().
+    buffer_need_bytes(wb,
+                      ( 20 * rrdr_rows(r)) // timestamp + json overhead
+                    + ( (pre_value_len + post_value_len + 4) * total_number_of_dimensions * rrdr_rows(r) ) // number
+                      );
+
     // for each line in the array
     calculated_number total = 1;
     for(i = start; i != end ;i += step) {
@@ -146,48 +170,52 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
             struct tm tmbuf, *tm = localtime_r(&now, &tmbuf);
             if(!tm) { error("localtime_r() failed."); continue; }
 
-            if(likely(i != start)) buffer_strcat(wb, ",\n");
-            buffer_strcat(wb, pre_date);
+            if(likely(i != start)) buffer_fast_strcat(wb, ",\n", 2);
+            buffer_fast_strcat(wb, pre_date, pre_date_len);
 
             if( options & RRDR_OPTION_OBJECTSROWS )
-                buffer_sprintf(wb, "%stime%s: ", kq, kq);
+                buffer_fast_strcat(wb, object_rows_time, object_rows_time_len);
 
-            if(dates_with_new)
-                buffer_strcat(wb, "new ");
+            if(unlikely(dates_with_new))
+                buffer_fast_strcat(wb, "new ", 4);
 
             buffer_jsdate(wb, tm->tm_year + 1900, tm->tm_mon, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec);
 
-            buffer_strcat(wb, post_date);
+            buffer_fast_strcat(wb, post_date, post_date_len);
 
-            if(row_annotations) {
+            if(unlikely(row_annotations)) {
                 // google supports one annotation per row
                 int annotation_found = 0;
                 for(c = 0, rd = temp_rd?temp_rd:r->st->dimensions; rd ;c++, rd = rd->next) {
                     if(unlikely(!(r->od[c] & RRDR_DIMENSION_SELECTED))) continue;
 
-                    if(co[c] & RRDR_VALUE_RESET) {
-                        buffer_strcat(wb, overflow_annotation);
+                    if(unlikely(co[c] & RRDR_VALUE_RESET)) {
+                        buffer_fast_strcat(wb, overflow_annotation, overflow_annotation_len);
                         annotation_found = 1;
                         break;
                     }
                 }
-                if(!annotation_found)
-                    buffer_strcat(wb, normal_annotation);
+                if(likely(!annotation_found))
+                    buffer_fast_strcat(wb, normal_annotation, normal_annotation_len);
             }
         }
         else {
             // print the timestamp of the line
-            if(likely(i != start)) buffer_strcat(wb, ",\n");
-            buffer_strcat(wb, pre_date);
+            if(likely(i != start))
+                buffer_fast_strcat(wb, ",\n", 2);
 
-            if( options & RRDR_OPTION_OBJECTSROWS )
-                buffer_sprintf(wb, "%stime%s: ", kq, kq);
+            buffer_fast_strcat(wb, pre_date, pre_date_len);
+
+            if(unlikely( options & RRDR_OPTION_OBJECTSROWS ))
+                buffer_fast_strcat(wb, object_rows_time, object_rows_time_len);
 
             buffer_rrd_value(wb, (calculated_number)r->t[i]);
+
             // in ms
-            if(options & RRDR_OPTION_MILLISECONDS) buffer_strcat(wb, "000");
+            if(unlikely(options & RRDR_OPTION_MILLISECONDS))
+                buffer_fast_strcat(wb, "000", 3);
 
-            buffer_strcat(wb, post_date);
+            buffer_fast_strcat(wb, post_date, post_date_len);
         }
 
         int set_min_max = 0;
@@ -213,16 +241,16 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
 
             calculated_number n = cn[c];
 
-            buffer_strcat(wb, pre_value);
+            buffer_fast_strcat(wb, pre_value, pre_value_len);
 
-            if( options & RRDR_OPTION_OBJECTSROWS )
+            if(unlikely( options & RRDR_OPTION_OBJECTSROWS ))
                 buffer_sprintf(wb, "%s%s%s: ", kq, rd->name, kq);
 
             if(co[c] & RRDR_VALUE_EMPTY) {
-                if(options & RRDR_OPTION_NULL2ZERO)
-                    buffer_strcat(wb, "0");
+                if(unlikely(options & RRDR_OPTION_NULL2ZERO))
+                    buffer_fast_strcat(wb, "0", 1);
                 else
-                    buffer_strcat(wb, "null");
+                    buffer_fast_strcat(wb, "null", 4);
             }
             else {
                 if(unlikely((options & RRDR_OPTION_ABSOLUTE) && n < 0))
@@ -243,10 +271,10 @@ void rrdr2json(RRDR *r, BUFFER *wb, RRDR_OPTIONS options, int datatable,  struct
                 buffer_rrd_value(wb, n);
             }
 
-            buffer_strcat(wb, post_value);
+            buffer_fast_strcat(wb, post_value, post_value_len);
         }
 
-        buffer_strcat(wb, post_line);
+        buffer_fast_strcat(wb, post_line, post_line_len);
     }
 
     buffer_strcat(wb, finish);