Browse Source

Support legacy Prometheus metric names for source average (#5531)

* Support older prometheus metric unit naming and allow removal of units from metrics

* Update swagger

* Added bats tests, improved efficiency of checking units
Chris Akritidis 6 years ago
parent
commit
56ac9f5145

+ 8 - 0
backends/prometheus/README.md

@@ -373,6 +373,14 @@ netdata sends all metrics prefixed with `netdata_`. You can change this in `netd
 
 It can also be changed from the URL, by appending `&prefix=netdata`.
 
+### Metric Units
+
+The default source `average` adds the unit of measurement to the name of each metric (e.g. `_KiB_persec`).
+To hide the units and get the same metric names as with the other sources, append to the URL `&hideunits=yes`.
+
+The units were standardized in v1.12, with the effect of changing the metric names. 
+To get the metric names as they were before v1.12, append to the URL `&oldunits=yes`
+
 ### Accuracy of `average` and `sum` data sources
 
 When the data source is set to `average` or `sum`, netdata remembers the last access of each client accessing prometheus metrics and uses this last access time to respond with the `average` or `sum` of all the entries in the database since that. This means that prometheus servers are not losing data when they access netdata with data source = `average` or `sum`.

+ 42 - 3
backends/prometheus/backend_prometheus.c

@@ -78,11 +78,50 @@ static inline size_t prometheus_label_copy(char *d, const char *s, size_t usable
     return n;
 }
 
-static inline char *prometheus_units_copy(char *d, const char *s, size_t usable) {
+static inline char *prometheus_units_copy(char *d, const char *s, size_t usable, int showoldunits) {
     const char *sorig = s;
     char *ret = d;
     size_t n;
 
+    // Fix for issue 5227
+    if (unlikely(showoldunits)) {
+		static struct {
+			const char *newunit;
+			uint32_t hash;
+			const char *oldunit;
+		} units[] = {
+				  {"KiB/s", 0, "kilobytes/s"}
+				, {"MiB/s", 0, "MB/s"}
+				, {"GiB/s", 0, "GB/s"}
+				, {"KiB"  , 0, "KB"}
+				, {"MiB"  , 0, "MB"}
+				, {"GiB"  , 0, "GB"}
+				, {"inodes"       , 0, "Inodes"}
+				, {"percentage"   , 0, "percent"}
+				, {"faults/s"     , 0, "page faults/s"}
+				, {"KiB/operation", 0, "kilobytes per operation"}
+				, {"milliseconds/operation", 0, "ms per operation"}
+				, {NULL, 0, NULL}
+		};
+		static int initialized = 0;
+		int i;
+
+		if(unlikely(!initialized)) {
+			for (i = 0; units[i].newunit; i++)
+				units[i].hash = simple_hash(units[i].newunit);
+			initialized = 1;
+		}
+
+		uint32_t hash = simple_hash(s);
+		for(i = 0; units[i].newunit ; i++) {
+			if(unlikely(hash == units[i].hash && !strcmp(s, units[i].newunit))) {
+				// info("matched extension for filename '%s': '%s'", filename, last_dot);
+				s=units[i].oldunit;
+				sorig = s;
+				break;
+			}
+		}
+    }
     *d++ = '_';
     for(n = 1; *s && n < usable ; d++, s++, n++) {
         register char c = *s;
@@ -275,8 +314,8 @@ static void rrd_stats_api_v1_charts_allmetrics_prometheus(RRDHOST *host, BUFFER
                     homogeneus = 0;
             }
             else {
-                if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE)
-                    prometheus_units_copy(units, st->units, PROMETHEUS_ELEMENT_MAX);
+                if(BACKEND_OPTIONS_DATA_SOURCE(backend_options) == BACKEND_SOURCE_DATA_AVERAGE && !(output_options & PROMETHEUS_OUTPUT_HIDEUNITS))
+                    prometheus_units_copy(units, st->units, PROMETHEUS_ELEMENT_MAX, output_options & PROMETHEUS_OUTPUT_OLDUNITS);
             }
 
             if(unlikely(output_options & PROMETHEUS_OUTPUT_HELP))

+ 3 - 1
backends/prometheus/backend_prometheus.h

@@ -11,7 +11,9 @@ typedef enum prometheus_output_flags {
     PROMETHEUS_OUTPUT_TYPES      = (1 << 1),
     PROMETHEUS_OUTPUT_NAMES      = (1 << 2),
     PROMETHEUS_OUTPUT_TIMESTAMPS = (1 << 3),
-    PROMETHEUS_OUTPUT_VARIABLES  = (1 << 4)
+    PROMETHEUS_OUTPUT_VARIABLES  = (1 << 4),
+	PROMETHEUS_OUTPUT_OLDUNITS   = (1 << 5),
+	PROMETHEUS_OUTPUT_HIDEUNITS  = (1 << 6)
 } PROMETHEUS_OUTPUT_OPTIONS;
 
 extern void rrd_stats_api_v1_charts_allmetrics_prometheus_single_host(RRDHOST *host, BUFFER *wb, const char *server, const char *prefix, BACKEND_OPTIONS backend_options, PROMETHEUS_OUTPUT_OPTIONS output_options);

+ 92 - 0
tests/backends/prometheus-avg-oldunits.txt

@@ -0,0 +1,92 @@
+nd_cpu_core_throttling_events_persec_average
+nd_cpu_cpu_percent_average
+nd_cpu_interrupts_interrupts_persec_average
+nd_cpu_softirqs_softirqs_persec_average
+nd_cpu_softnet_stat_events_persec_average
+nd_disk_avgsz_kilobytes_per_operation_average
+nd_disk_await_ms_per_operation_average
+nd_disk_backlog_milliseconds_average
+nd_disk_inodes_Inodes_average
+nd_disk_io_kilobytes_persec_average
+nd_disk_iotime_milliseconds_persec_average
+nd_disk_mops_merged_operations_persec_average
+nd_disk_ops_operations_persec_average
+nd_disk_space_GB_average
+nd_disk_svctm_ms_per_operation_average
+nd_disk_util___of_time_working_average
+nd_ip_bcast_kilobits_persec_average
+nd_ip_bcastpkts_packets_persec_average
+nd_ip_ecnpkts_packets_persec_average
+nd_ip_inerrors_packets_persec_average
+nd_ip_mcast_kilobits_persec_average
+nd_ip_mcastpkts_packets_persec_average
+nd_ip_tcpconnaborts_connections_persec_average
+nd_ip_tcpofo_packets_persec_average
+nd_ip_tcpreorders_packets_persec_average
+nd_ipv4_errors_packets_persec_average
+nd_ipv4_icmp_errors_packets_persec_average
+nd_ipv4_icmpmsg_packets_persec_average
+nd_ipv4_icmp_packets_persec_average
+nd_ipv4_packets_packets_persec_average
+nd_ipv4_sockstat_sockets_sockets_average
+nd_ipv4_sockstat_tcp_mem_KB_average
+nd_ipv4_sockstat_tcp_sockets_sockets_average
+nd_ipv4_sockstat_udp_mem_KB_average
+nd_ipv4_sockstat_udp_sockets_sockets_average
+nd_ipv4_tcperrors_packets_persec_average
+nd_ipv4_tcphandshake_events_persec_average
+nd_ipv4_tcpopens_connections_persec_average
+nd_ipv4_tcppackets_packets_persec_average
+nd_ipv4_tcpsock_active_connections_average
+nd_ipv4_udperrors_events_persec_average
+nd_ipv4_udppackets_packets_persec_average
+nd_ipv6_ect_packets_persec_average
+nd_ipv6_errors_packets_persec_average
+nd_ipv6_icmpechos_messages_persec_average
+nd_ipv6_icmperrors_errors_persec_average
+nd_ipv6_icmp_messages_persec_average
+nd_ipv6_icmpmldv2_reports_persec_average
+nd_ipv6_icmpneighbor_messages_persec_average
+nd_ipv6_icmprouter_messages_persec_average
+nd_ipv6_icmptypes_messages_persec_average
+nd_ipv6_mcast_kilobits_persec_average
+nd_ipv6_mcastpkts_packets_persec_average
+nd_ipv6_packets_packets_persec_average
+nd_ipv6_sockstat6_raw_sockets_sockets_average
+nd_ipv6_sockstat6_tcp_sockets_sockets_average
+nd_ipv6_sockstat6_udp_sockets_sockets_average
+nd_ipv6_udperrors_events_persec_average
+nd_ipv6_udppackets_packets_persec_average
+nd_mem_available_MB_average
+nd_mem_committed_MB_average
+nd_mem_kernel_MB_average
+nd_mem_pgfaults_page_faults_persec_average
+nd_mem_slab_MB_average
+nd_mem_transparent_hugepages_MB_average
+nd_mem_writeback_MB_average
+nd_net_drops_drops_persec_average
+nd_net_net_kilobits_persec_average
+nd_net_packets_packets_persec_average
+nd_system_active_processes_processes_average
+nd_system_cpu_percent_average
+nd_system_ctxt_context_switches_persec_average
+nd_system_entropy_entropy_average
+nd_system_forks_processes_persec_average
+nd_system_idlejitter_microseconds_lost_persec_average
+nd_system_interrupts_interrupts_persec_average
+nd_system_intr_interrupts_persec_average
+nd_system_io_kilobytes_persec_average
+nd_system_ipc_semaphore_arrays_arrays_average
+nd_system_ipc_semaphores_semaphores_average
+nd_system_ip_kilobits_persec_average
+nd_system_ipv6_kilobits_persec_average
+nd_system_load_load_average
+nd_system_net_kilobits_persec_average
+nd_system_pgpgio_kilobytes_persec_average
+nd_system_processes_processes_average
+nd_system_ram_MB_average
+nd_system_softirqs_softirqs_persec_average
+nd_system_softnet_stat_events_persec_average
+nd_system_swapio_kilobytes_persec_average
+nd_system_swap_MB_average
+nd_system_uptime_seconds_average

+ 92 - 0
tests/backends/prometheus-avg.txt

@@ -0,0 +1,92 @@
+nd_cpu_core_throttling_events_persec_average
+nd_cpu_cpu_percentage_average
+nd_cpu_interrupts_interrupts_persec_average
+nd_cpu_softirqs_softirqs_persec_average
+nd_cpu_softnet_stat_events_persec_average
+nd_disk_avgsz_KiB_operation_average
+nd_disk_await_milliseconds_operation_average
+nd_disk_backlog_milliseconds_average
+nd_disk_inodes_inodes_average
+nd_disk_io_KiB_persec_average
+nd_disk_iotime_milliseconds_persec_average
+nd_disk_mops_merged_operations_persec_average
+nd_disk_ops_operations_persec_average
+nd_disk_space_GiB_average
+nd_disk_svctm_milliseconds_operation_average
+nd_disk_util___of_time_working_average
+nd_ip_bcast_kilobits_persec_average
+nd_ip_bcastpkts_packets_persec_average
+nd_ip_ecnpkts_packets_persec_average
+nd_ip_inerrors_packets_persec_average
+nd_ip_mcast_kilobits_persec_average
+nd_ip_mcastpkts_packets_persec_average
+nd_ip_tcpconnaborts_connections_persec_average
+nd_ip_tcpofo_packets_persec_average
+nd_ip_tcpreorders_packets_persec_average
+nd_ipv4_errors_packets_persec_average
+nd_ipv4_icmp_errors_packets_persec_average
+nd_ipv4_icmpmsg_packets_persec_average
+nd_ipv4_icmp_packets_persec_average
+nd_ipv4_packets_packets_persec_average
+nd_ipv4_sockstat_sockets_sockets_average
+nd_ipv4_sockstat_tcp_mem_KiB_average
+nd_ipv4_sockstat_tcp_sockets_sockets_average
+nd_ipv4_sockstat_udp_mem_KiB_average
+nd_ipv4_sockstat_udp_sockets_sockets_average
+nd_ipv4_tcperrors_packets_persec_average
+nd_ipv4_tcphandshake_events_persec_average
+nd_ipv4_tcpopens_connections_persec_average
+nd_ipv4_tcppackets_packets_persec_average
+nd_ipv4_tcpsock_active_connections_average
+nd_ipv4_udperrors_events_persec_average
+nd_ipv4_udppackets_packets_persec_average
+nd_ipv6_ect_packets_persec_average
+nd_ipv6_errors_packets_persec_average
+nd_ipv6_icmpechos_messages_persec_average
+nd_ipv6_icmperrors_errors_persec_average
+nd_ipv6_icmp_messages_persec_average
+nd_ipv6_icmpmldv2_reports_persec_average
+nd_ipv6_icmpneighbor_messages_persec_average
+nd_ipv6_icmprouter_messages_persec_average
+nd_ipv6_icmptypes_messages_persec_average
+nd_ipv6_mcast_kilobits_persec_average
+nd_ipv6_mcastpkts_packets_persec_average
+nd_ipv6_packets_packets_persec_average
+nd_ipv6_sockstat6_raw_sockets_sockets_average
+nd_ipv6_sockstat6_tcp_sockets_sockets_average
+nd_ipv6_sockstat6_udp_sockets_sockets_average
+nd_ipv6_udperrors_events_persec_average
+nd_ipv6_udppackets_packets_persec_average
+nd_mem_available_MiB_average
+nd_mem_committed_MiB_average
+nd_mem_kernel_MiB_average
+nd_mem_pgfaults_faults_persec_average
+nd_mem_slab_MiB_average
+nd_mem_transparent_hugepages_MiB_average
+nd_mem_writeback_MiB_average
+nd_net_drops_drops_persec_average
+nd_net_net_kilobits_persec_average
+nd_net_packets_packets_persec_average
+nd_system_active_processes_processes_average
+nd_system_cpu_percentage_average
+nd_system_ctxt_context_switches_persec_average
+nd_system_entropy_entropy_average
+nd_system_forks_processes_persec_average
+nd_system_idlejitter_microseconds_lost_persec_average
+nd_system_interrupts_interrupts_persec_average
+nd_system_intr_interrupts_persec_average
+nd_system_io_KiB_persec_average
+nd_system_ipc_semaphore_arrays_arrays_average
+nd_system_ipc_semaphores_semaphores_average
+nd_system_ip_kilobits_persec_average
+nd_system_ipv6_kilobits_persec_average
+nd_system_load_load_average
+nd_system_net_kilobits_persec_average
+nd_system_pgpgio_KiB_persec_average
+nd_system_processes_processes_average
+nd_system_ram_MiB_average
+nd_system_softirqs_softirqs_persec_average
+nd_system_softnet_stat_events_persec_average
+nd_system_swapio_KiB_persec_average
+nd_system_swap_MiB_average
+nd_system_uptime_seconds_average

+ 92 - 0
tests/backends/prometheus-raw.txt

@@ -0,0 +1,92 @@
+nd_cpu_core_throttling_total
+nd_cpu_cpu_total
+nd_cpu_interrupts_total
+nd_cpu_softirqs_total
+nd_cpu_softnet_stat_total
+nd_disk_avgsz
+nd_disk_await
+nd_disk_backlog_total
+nd_disk_inodes
+nd_disk_iotime_total
+nd_disk_io_total
+nd_disk_mops_total
+nd_disk_ops_total
+nd_disk_space
+nd_disk_svctm
+nd_disk_util_total
+nd_ip_bcastpkts_total
+nd_ip_bcast_total
+nd_ip_ecnpkts_total
+nd_ip_inerrors_total
+nd_ip_mcastpkts_total
+nd_ip_mcast_total
+nd_ip_tcpconnaborts_total
+nd_ip_tcpofo_total
+nd_ip_tcpreorders_total
+nd_ipv4_errors_total
+nd_ipv4_icmp_errors_total
+nd_ipv4_icmpmsg_total
+nd_ipv4_icmp_total
+nd_ipv4_packets_total
+nd_ipv4_sockstat_sockets
+nd_ipv4_sockstat_tcp_mem
+nd_ipv4_sockstat_tcp_sockets
+nd_ipv4_sockstat_udp_mem
+nd_ipv4_sockstat_udp_sockets
+nd_ipv4_tcperrors_total
+nd_ipv4_tcphandshake_total
+nd_ipv4_tcpopens_total
+nd_ipv4_tcppackets_total
+nd_ipv4_tcpsock
+nd_ipv4_udperrors_total
+nd_ipv4_udppackets_total
+nd_ipv6_ect_total
+nd_ipv6_errors_total
+nd_ipv6_icmpechos_total
+nd_ipv6_icmperrors_total
+nd_ipv6_icmpmldv2_total
+nd_ipv6_icmpneighbor_total
+nd_ipv6_icmprouter_total
+nd_ipv6_icmp_total
+nd_ipv6_icmptypes_total
+nd_ipv6_mcastpkts_total
+nd_ipv6_mcast_total
+nd_ipv6_packets_total
+nd_ipv6_sockstat6_raw_sockets
+nd_ipv6_sockstat6_tcp_sockets
+nd_ipv6_sockstat6_udp_sockets
+nd_ipv6_udperrors_total
+nd_ipv6_udppackets_total
+nd_mem_available
+nd_mem_committed
+nd_mem_kernel
+nd_mem_pgfaults_total
+nd_mem_slab
+nd_mem_transparent_hugepages
+nd_mem_writeback
+nd_net_drops_total
+nd_net_net_total
+nd_net_packets_total
+nd_system_active_processes
+nd_system_cpu_total
+nd_system_ctxt_total
+nd_system_entropy
+nd_system_forks_total
+nd_system_idlejitter
+nd_system_interrupts_total
+nd_system_intr_total
+nd_system_io_total
+nd_system_ipc_semaphore_arrays
+nd_system_ipc_semaphores
+nd_system_ip_total
+nd_system_ipv6_total
+nd_system_load
+nd_system_net_total
+nd_system_pgpgio_total
+nd_system_processes
+nd_system_ram
+nd_system_softirqs_total
+nd_system_softnet_stat_total
+nd_system_swap
+nd_system_swapio_total
+nd_system_uptime

+ 31 - 0
tests/backends/prometheus.bats

@@ -0,0 +1,31 @@
+#!/usr/bin/env bats
+
+validate_metrics() {
+	fname="${1}"
+	params="${2}"
+
+	curl -sS "http://localhost:19999/api/v1/allmetrics?format=prometheus&prefix=nd&timestamps=no${params}" |
+	grep -E 'nd_system_|nd_cpu_|nd_system_|nd_net_|nd_disk_|nd_ip_|nd_ipv4_|nd_ipv6_|nd_mem_' |
+	sed -ne 's/{.*//p' | sort | uniq > tests/backends/new-${fname}
+	diff tests/backends/${fname} tests/backends/new-${fname}
+	rm tests/backends/new-${fname}
+}
+
+
+if [ ! -f .gitignore ];	then
+	echo "Need to run as ./tests/backends/$(basename "$0") from top level directory of git repository" >&2
+	exit 1
+fi
+
+
+@test "prometheus raw" {
+	validate_metrics prometheus-raw.txt "&data=raw"
+}
+
+@test "prometheus avg" {
+	validate_metrics prometheus-avg.txt ""
+}
+
+@test "prometheus avg oldunits" {
+	validate_metrics prometheus-avg-oldunits.txt "&oldunits=yes"
+}

+ 2 - 1
web/api/exporters/allmetrics.c

@@ -11,7 +11,8 @@ struct prometheus_output_options {
         { "names",      PROMETHEUS_OUTPUT_NAMES      },
         { "timestamps", PROMETHEUS_OUTPUT_TIMESTAMPS },
         { "variables",  PROMETHEUS_OUTPUT_VARIABLES  },
-
+        { "oldunits",   PROMETHEUS_OUTPUT_OLDUNITS   },
+        { "hideunits",  PROMETHEUS_OUTPUT_HIDEUNITS  },
         // terminator
         { NULL, PROMETHEUS_OUTPUT_NONE },
 };

+ 24 - 0
web/api/netdata-swagger.json

@@ -506,6 +506,30 @@
             ],
             "default": "yes"
           },
+          {
+            "name": "oldunits",
+            "in": "query",
+            "description": "When enabled, netdata will show metric names for the default source=average as they appeared before 1.12, by using the legacy unit naming conventions",
+            "required": false,
+            "type": "string",
+            "enum": [
+              "yes",
+              "no"
+            ],
+            "default": "yes"
+          },
+          {
+            "name": "hideunits",
+            "in": "query",
+            "description": "When enabled, netdata will not include the units in the metric names, for the default source=average.",
+            "required": false,
+            "type": "string",
+            "enum": [
+              "yes",
+              "no"
+            ],
+            "default": "yes"
+          },
           {
             "name": "server",
             "in": "query",

+ 14 - 0
web/api/netdata-swagger.yaml

@@ -333,6 +333,20 @@ paths:
           type: string
           enum: [ 'yes', 'no' ]
           default: 'yes'
+        - name: oldunits
+          in: query
+          description: 'When enabled, netdata will show metric names for the default source=average as they appeared before 1.12, by using the legacy unit naming conventions'
+          required: false
+          type: string
+          enum: [ 'yes', 'no' ]
+          default: 'yes'
+        - name: hideunits
+          in: query
+          description: 'When enabled, netdata will not include the units in the metric names, for the default source=average.'
+          required: false
+          type: string
+          enum: [ 'yes', 'no' ]
+          default: 'yes'
         - name: server
           in: query
           description: 'Set a distinct name of the client querying prometheus metrics. Netdata will use the client IP if this is not set.'