4 months ago · a5e5e2bf0b
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3096,7 +3096,6 @@ if(ENABLE_PLUGIN_PYTHON)
 
				           src/collectors/python.d.plugin/haproxy/haproxy.conf
			
 
				           src/collectors/python.d.plugin/pandas/pandas.conf
			
 
				           src/collectors/python.d.plugin/traefik/traefik.conf
			
 
				-          src/collectors/python.d.plugin/zscores/zscores.conf
			
 
				           COMPONENT plugin-pythond
			
 
				           DESTINATION usr/lib/netdata/conf.d/python.d)
			
 
				 
			
@@ -3106,7 +3105,6 @@ if(ENABLE_PLUGIN_PYTHON)
 
				           src/collectors/python.d.plugin/haproxy/haproxy.chart.py
			
 
				           src/collectors/python.d.plugin/pandas/pandas.chart.py
			
 
				           src/collectors/python.d.plugin/traefik/traefik.chart.py
			
 
				-          src/collectors/python.d.plugin/zscores/zscores.chart.py
			
 
				           COMPONENT plugin-pythond
			
 
				           DESTINATION usr/libexec/netdata/python.d)
			
 
				 
			
--- a/src/collectors/python.d.plugin/python.d.conf
+++ b/src/collectors/python.d.plugin/python.d.conf
@@ -34,7 +34,6 @@ go_expvar: no
 
				 # smartd_log: yes
			
 
				 # traefik: yes
			
 
				 # varnish: yes
			
 
				-# zscores: no
			
 
				 
			
 
				 
			
 
				 ## Disabled for existing installations.
			
--- a/src/collectors/python.d.plugin/zscores/README.md
+++ b/src/collectors/python.d.plugin/zscores/README.md
@@ -1 +0,0 @@
 
				-integrations/python.d_zscores.md
			
--- a/src/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md
+++ b/src/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md
@@ -1,229 +0,0 @@
 
				-<!--startmeta
			
 
				-custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/zscores/README.md"
			
 
				-meta_yaml: "https://github.com/netdata/netdata/edit/master/src/collectors/python.d.plugin/zscores/metadata.yaml"
			
 
				-sidebar_label: "python.d zscores"
			
 
				-learn_status: "Published"
			
 
				-learn_rel_path: "Collecting Metrics/Other"
			
 
				-most_popular: False
			
 
				-message: "DO NOT EDIT THIS FILE DIRECTLY, IT IS GENERATED BY THE COLLECTOR'S metadata.yaml FILE"
			
 
				-endmeta-->
			
 
				-
			
 
				-# python.d zscores
			
 
				-
			
 
				-Plugin: python.d.plugin
			
 
				-Module: zscores
			
 
				-
			
 
				-<img src="https://img.shields.io/badge/maintained%20by-Netdata-%2300ab44" />
			
 
				-
			
 
				-## Overview
			
 
				-
			
 
				-By using smoothed, rolling [Z-Scores](https://en.wikipedia.org/wiki/Standard_score) for selected metrics or charts you can narrow down your focus and shorten root cause analysis.
			
 
				-
			
 
				-
			
 
				-This collector uses the [Netdata rest api](https://github.com/netdata/netdata/blob/master/src/web/api/README.md) to get the `mean` and `stddev`
			
 
				-for each dimension on specified charts over a time range (defined by `train_secs` and `offset_secs`).
			
 
				-
			
 
				-For each dimension it will calculate a Z-Score as `z = (x - mean) / stddev` (clipped at `z_clip`). Scores are then smoothed over
			
 
				-time (`z_smooth_n`) and, if `mode: 'per_chart'`, aggregated across dimensions to a smoothed, rolling chart level Z-Score at each time step.
			
 
				-
			
 
				-
			
 
				-This collector is supported on all platforms.
			
 
				-
			
 
				-This collector supports collecting metrics from multiple instances of this integration, including remote instances.
			
 
				-
			
 
				-
			
 
				-### Default Behavior
			
 
				-
			
 
				-#### Auto-Detection
			
 
				-
			
 
				-This integration doesn't support auto-detection.
			
 
				-
			
 
				-#### Limits
			
 
				-
			
 
				-The default configuration for this integration does not impose any limits on data collection.
			
 
				-
			
 
				-#### Performance Impact
			
 
				-
			
 
				-The default configuration for this integration is not expected to impose a significant performance impact on the system.
			
 
				-
			
 
				-
			
 
				-## Metrics
			
 
				-
			
 
				-Metrics grouped by *scope*.
			
 
				-
			
 
				-The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
			
 
				-
			
 
				-
			
 
				-
			
 
				-### Per python.d zscores instance
			
 
				-
			
 
				-These metrics refer to the entire monitored application.
			
 
				-
			
 
				-This scope has no labels.
			
 
				-
			
 
				-Metrics:
			
 
				-
			
 
				-| Metric | Dimensions | Unit |
			
 
				-|:------|:----------|:----|
			
 
				-| zscores.z | a dimension per chart or dimension | z |
			
 
				-| zscores.3stddev | a dimension per chart or dimension | count |
			
 
				-
			
 
				-
			
 
				-
			
 
				-## Alerts
			
 
				-
			
 
				-There are no alerts configured by default for this integration.
			
 
				-
			
 
				-
			
 
				-## Setup
			
 
				-
			
 
				-### Prerequisites
			
 
				-
			
 
				-#### Python Requirements
			
 
				-
			
 
				-This collector will only work with Python 3 and requires the below packages be installed.
			
 
				-
			
 
				-```bash
			
 
				-# become netdata user
			
 
				-sudo su -s /bin/bash netdata
			
 
				-# install required packages
			
 
				-pip3 install numpy pandas requests netdata-pandas==0.0.38
			
 
				-```
			
 
				-
			
 
				-
			
 
				-
			
 
				-### Configuration
			
 
				-
			
 
				-#### File
			
 
				-
			
 
				-The configuration file name for this integration is `python.d/zscores.conf`.
			
 
				-
			
 
				-
			
 
				-You can edit the configuration file using the [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#edit-a-configuration-file-using-edit-config) script from the
			
 
				-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/netdata-agent/configuration/README.md#the-netdata-config-directory).
			
 
				-
			
 
				-```bash
			
 
				-cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
			
 
				-sudo ./edit-config python.d/zscores.conf
			
 
				-```
			
 
				-#### Options
			
 
				-
			
 
				-There are 2 sections:
			
 
				-
			
 
				-* Global variables
			
 
				-* One or more JOBS that can define multiple different instances to monitor.
			
 
				-
			
 
				-The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
			
 
				-
			
 
				-Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
			
 
				-
			
 
				-Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
			
 
				-
			
 
				-
			
 
				-<details open><summary>Config options</summary>
			
 
				-
			
 
				-| Name | Description | Default | Required |
			
 
				-|:----|:-----------|:-------|:--------:|
			
 
				-| charts_regex | what charts to pull data for - A regex like `system\..*/` or `system\..*/apps.cpu/apps.mem` etc. | system\..* | yes |
			
 
				-| train_secs | length of time (in seconds) to base calculations off for mean and stddev. | 14400 | yes |
			
 
				-| offset_secs | offset (in seconds) preceding latest data to ignore when calculating mean and stddev. | 300 | yes |
			
 
				-| train_every_n | recalculate the mean and stddev every n steps of the collector. | 900 | yes |
			
 
				-| z_smooth_n | smooth the z score (to reduce sensitivity to spikes) by averaging it over last n values. | 15 | yes |
			
 
				-| z_clip | cap absolute value of zscore (before smoothing) for better stability. | 10 | yes |
			
 
				-| z_abs | set z_abs: 'true' to make all zscores be absolute values only. | true | yes |
			
 
				-| burn_in | burn in period in which to initially calculate mean and stddev on every step. | 2 | yes |
			
 
				-| mode | mode can be to get a zscore 'per_dim' or 'per_chart'. | per_chart | yes |
			
 
				-| per_chart_agg | per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'. | mean | yes |
			
 
				-| update_every | Sets the default data collection frequency. | 5 | no |
			
 
				-| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
			
 
				-| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
			
 
				-| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
			
 
				-
			
 
				-</details>
			
 
				-
			
 
				-#### Examples
			
 
				-
			
 
				-##### Default
			
 
				-
			
 
				-Default configuration.
			
 
				-
			
 
				-```yaml
			
 
				-local:
			
 
				-  name: 'local'
			
 
				-  host: '127.0.0.1:19999'
			
 
				-  charts_regex: 'system\..*'
			
 
				-  charts_to_exclude: 'system.uptime'
			
 
				-  train_secs: 14400
			
 
				-  offset_secs: 300
			
 
				-  train_every_n: 900
			
 
				-  z_smooth_n: 15
			
 
				-  z_clip: 10
			
 
				-  z_abs: 'true'
			
 
				-  burn_in: 2
			
 
				-  mode: 'per_chart'
			
 
				-  per_chart_agg: 'mean'
			
 
				-
			
 
				-```
			
 
				-
			
 
				-
			
 
				-## Troubleshooting
			
 
				-
			
 
				-### Debug Mode
			
 
				-
			
 
				-
			
 
				-To troubleshoot issues with the `zscores` collector, run the `python.d.plugin` with the debug option enabled. The output
			
 
				-should give you clues as to why the collector isn't working.
			
 
				-
			
 
				-- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
			
 
				-  your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
			
 
				-
			
 
				-  ```bash
			
 
				-  cd /usr/libexec/netdata/plugins.d/
			
 
				-  ```
			
 
				-
			
 
				-- Switch to the `netdata` user.
			
 
				-
			
 
				-  ```bash
			
 
				-  sudo -u netdata -s
			
 
				-  ```
			
 
				-
			
 
				-- Run the `python.d.plugin` to debug the collector:
			
 
				-
			
 
				-  ```bash
			
 
				-  ./python.d.plugin zscores debug trace
			
 
				-  ```
			
 
				-
			
 
				-### Getting Logs
			
 
				-
			
 
				-If you're encountering problems with the `zscores` collector, follow these steps to retrieve logs and identify potential issues:
			
 
				-
			
 
				-- **Run the command** specific to your system (systemd, non-systemd, or Docker container).
			
 
				-- **Examine the output** for any warnings or error messages that might indicate issues.  These messages should provide clues about the root cause of the problem.
			
 
				-
			
 
				-#### System with systemd
			
 
				-
			
 
				-Use the following command to view logs generated since the last Netdata service restart:
			
 
				-
			
 
				-```bash
			
 
				-journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata --grep zscores
			
 
				-```
			
 
				-
			
 
				-#### System without systemd
			
 
				-
			
 
				-Locate the collector log file, typically at `/var/log/netdata/collector.log`, and use `grep` to filter for collector's name:
			
 
				-
			
 
				-```bash
			
 
				-grep zscores /var/log/netdata/collector.log
			
 
				-```
			
 
				-
			
 
				-**Note**: This method shows logs from all restarts. Focus on the **latest entries** for troubleshooting current issues.
			
 
				-
			
 
				-#### Docker Container
			
 
				-
			
 
				-If your Netdata runs in a Docker container named "netdata" (replace if different), use this command:
			
 
				-
			
 
				-```bash
			
 
				-docker logs netdata 2>&1 | grep zscores
			
 
				-```
			
 
				-
			
 
				-
			
--- a/src/collectors/python.d.plugin/zscores/metadata.yaml
+++ b/src/collectors/python.d.plugin/zscores/metadata.yaml
@@ -1,187 +0,0 @@
 
				-plugin_name: python.d.plugin
			
 
				-modules:
			
 
				-  - meta:
			
 
				-      plugin_name: python.d.plugin
			
 
				-      module_name: zscores
			
 
				-      monitored_instance:
			
 
				-        name: python.d zscores
			
 
				-        link: https://en.wikipedia.org/wiki/Standard_score
			
 
				-        categories:
			
 
				-          - data-collection.other
			
 
				-        icon_filename: ""
			
 
				-      related_resources:
			
 
				-        integrations:
			
 
				-          list: []
			
 
				-      info_provided_to_referring_integrations:
			
 
				-        description: ""
			
 
				-      keywords:
			
 
				-        - zscore
			
 
				-        - z-score
			
 
				-        - standard score
			
 
				-        - standard deviation
			
 
				-        - anomaly detection
			
 
				-        - statistical anomaly detection
			
 
				-      most_popular: false
			
 
				-    overview:
			
 
				-      data_collection:
			
 
				-        metrics_description: |
			
 
				-          By using smoothed, rolling [Z-Scores](https://en.wikipedia.org/wiki/Standard_score) for selected metrics or charts you can narrow down your focus and shorten root cause analysis.
			
 
				-        method_description: |
			
 
				-          This collector uses the [Netdata rest api](/src/web/api/README.md) to get the `mean` and `stddev`
			
 
				-          for each dimension on specified charts over a time range (defined by `train_secs` and `offset_secs`).
			
 
				-          
			
 
				-          For each dimension it will calculate a Z-Score as `z = (x - mean) / stddev` (clipped at `z_clip`). Scores are then smoothed over
			
 
				-          time (`z_smooth_n`) and, if `mode: 'per_chart'`, aggregated across dimensions to a smoothed, rolling chart level Z-Score at each time step.
			
 
				-      supported_platforms:
			
 
				-        include: []
			
 
				-        exclude: []
			
 
				-      multi_instance: true
			
 
				-      additional_permissions:
			
 
				-        description: ""
			
 
				-      default_behavior:
			
 
				-        auto_detection:
			
 
				-          description: ""
			
 
				-        limits:
			
 
				-          description: ""
			
 
				-        performance_impact:
			
 
				-          description: ""
			
 
				-    setup:
			
 
				-      prerequisites:
			
 
				-        list:
			
 
				-          - title: Python Requirements
			
 
				-            description: |
			
 
				-              This collector will only work with Python 3 and requires the below packages be installed.
			
 
				-              
			
 
				-              ```bash
			
 
				-              # become netdata user
			
 
				-              sudo su -s /bin/bash netdata
			
 
				-              # install required packages
			
 
				-              pip3 install numpy pandas requests netdata-pandas==0.0.38
			
 
				-              ```
			
 
				-      configuration:
			
 
				-        file:
			
 
				-          name: python.d/zscores.conf
			
 
				-          description: ""
			
 
				-        options:
			
 
				-          description: |
			
 
				-            There are 2 sections:
			
 
				-            
			
 
				-            * Global variables
			
 
				-            * One or more JOBS that can define multiple different instances to monitor.
			
 
				-            
			
 
				-            The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
			
 
				-            
			
 
				-            Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
			
 
				-            
			
 
				-            Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
			
 
				-          folding:
			
 
				-            title: "Config options"
			
 
				-            enabled: true
			
 
				-          list:
			
 
				-            - name: charts_regex
			
 
				-              description: what charts to pull data for - A regex like `system\..*|` or `system\..*|apps.cpu|apps.mem` etc.
			
 
				-              default_value: "system\\..*"
			
 
				-              required: true
			
 
				-            - name: train_secs
			
 
				-              description: length of time (in seconds) to base calculations off for mean and stddev.
			
 
				-              default_value: 14400
			
 
				-              required: true
			
 
				-            - name: offset_secs
			
 
				-              description: offset (in seconds) preceding latest data to ignore when calculating mean and stddev.
			
 
				-              default_value: 300
			
 
				-              required: true
			
 
				-            - name: train_every_n
			
 
				-              description: recalculate the mean and stddev every n steps of the collector.
			
 
				-              default_value: 900
			
 
				-              required: true
			
 
				-            - name: z_smooth_n
			
 
				-              description: smooth the z score (to reduce sensitivity to spikes) by averaging it over last n values.
			
 
				-              default_value: 15
			
 
				-              required: true
			
 
				-            - name: z_clip
			
 
				-              description: cap absolute value of zscore (before smoothing) for better stability.
			
 
				-              default_value: 10
			
 
				-              required: true
			
 
				-            - name: z_abs
			
 
				-              description: "set z_abs: 'true' to make all zscores be absolute values only."
			
 
				-              default_value: "true"
			
 
				-              required: true
			
 
				-            - name: burn_in
			
 
				-              description: burn in period in which to initially calculate mean and stddev on every step.
			
 
				-              default_value: 2
			
 
				-              required: true
			
 
				-            - name: mode
			
 
				-              description: mode can be to get a zscore 'per_dim' or 'per_chart'.
			
 
				-              default_value: per_chart
			
 
				-              required: true
			
 
				-            - name: per_chart_agg
			
 
				-              description: per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'.
			
 
				-              default_value: mean
			
 
				-              required: true
			
 
				-            - name: update_every
			
 
				-              description: Sets the default data collection frequency.
			
 
				-              default_value: 5
			
 
				-              required: false
			
 
				-            - name: priority
			
 
				-              description: Controls the order of charts at the netdata dashboard.
			
 
				-              default_value: 60000
			
 
				-              required: false
			
 
				-            - name: autodetection_retry
			
 
				-              description: Sets the job re-check interval in seconds.
			
 
				-              default_value: 0
			
 
				-              required: false
			
 
				-            - name: penalty
			
 
				-              description: Indicates whether to apply penalty to update_every in case of failures.
			
 
				-              default_value: yes
			
 
				-              required: false
			
 
				-        examples:
			
 
				-          folding:
			
 
				-            enabled: true
			
 
				-            title: "Config"
			
 
				-          list:
			
 
				-            - name: Default
			
 
				-              description: Default configuration.
			
 
				-              folding:
			
 
				-                enabled: false
			
 
				-              config: |
			
 
				-                local:
			
 
				-                  name: 'local'
			
 
				-                  host: '127.0.0.1:19999'
			
 
				-                  charts_regex: 'system\..*'
			
 
				-                  charts_to_exclude: 'system.uptime'
			
 
				-                  train_secs: 14400
			
 
				-                  offset_secs: 300
			
 
				-                  train_every_n: 900
			
 
				-                  z_smooth_n: 15
			
 
				-                  z_clip: 10
			
 
				-                  z_abs: 'true'
			
 
				-                  burn_in: 2
			
 
				-                  mode: 'per_chart'
			
 
				-                  per_chart_agg: 'mean'
			
 
				-    troubleshooting:
			
 
				-      problems:
			
 
				-        list: []
			
 
				-    alerts: []
			
 
				-    metrics:
			
 
				-      folding:
			
 
				-        title: Metrics
			
 
				-        enabled: false
			
 
				-      description: ""
			
 
				-      availability: []
			
 
				-      scopes:
			
 
				-        - name: global
			
 
				-          description: "These metrics refer to the entire monitored application."
			
 
				-          labels: []
			
 
				-          metrics:
			
 
				-            - name: zscores.z
			
 
				-              description: Z Score
			
 
				-              unit: "z"
			
 
				-              chart_type: line
			
 
				-              dimensions:
			
 
				-                - name: a dimension per chart or dimension
			
 
				-            - name: zscores.3stddev
			
 
				-              description: Z Score >3
			
 
				-              unit: "count"
			
 
				-              chart_type: stacked
			
 
				-              dimensions:
			
 
				-                - name: a dimension per chart or dimension
			
--- a/src/collectors/python.d.plugin/zscores/zscores.chart.py
+++ b/src/collectors/python.d.plugin/zscores/zscores.chart.py
@@ -1,146 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# Description: zscores netdata python.d module
			
 
				-# Author: andrewm4894
			
 
				-# SPDX-License-Identifier: GPL-3.0-or-later
			
 
				-
			
 
				-from datetime import datetime
			
 
				-import re
			
 
				-
			
 
				-import requests
			
 
				-import numpy as np
			
 
				-import pandas as pd
			
 
				-
			
 
				-from bases.FrameworkServices.SimpleService import SimpleService
			
 
				-from netdata_pandas.data import get_data, get_allmetrics
			
 
				-
			
 
				-priority = 60000
			
 
				-update_every = 5
			
 
				-disabled_by_default = True
			
 
				-
			
 
				-ORDER = [
			
 
				-    'z',
			
 
				-    '3stddev'
			
 
				-]
			
 
				-
			
 
				-CHARTS = {
			
 
				-    'z': {
			
 
				-        'options': ['z', 'Z Score', 'z', 'Z Score', 'zscores.z', 'line'],
			
 
				-        'lines': []
			
 
				-    },
			
 
				-    '3stddev': {
			
 
				-        'options': ['3stddev', 'Z Score >3', 'count', '3 Stddev', 'zscores.3stddev', 'stacked'],
			
 
				-        'lines': []
			
 
				-    },
			
 
				-}
			
 
				-
			
 
				-
			
 
				-class Service(SimpleService):
			
 
				-    def __init__(self, configuration=None, name=None):
			
 
				-        SimpleService.__init__(self, configuration=configuration, name=name)
			
 
				-        self.host = self.configuration.get('host', '127.0.0.1:19999')
			
 
				-        self.charts_regex = re.compile(self.configuration.get('charts_regex', 'system.*'))
			
 
				-        self.charts_to_exclude = self.configuration.get('charts_to_exclude', '').split(',')
			
 
				-        self.charts_in_scope = [
			
 
				-            c for c in
			
 
				-            list(filter(self.charts_regex.match,
			
 
				-                        requests.get(f'http://{self.host}/api/v1/charts').json()['charts'].keys()))
			
 
				-            if c not in self.charts_to_exclude
			
 
				-        ]
			
 
				-        self.train_secs = self.configuration.get('train_secs', 14400)
			
 
				-        self.offset_secs = self.configuration.get('offset_secs', 300)
			
 
				-        self.train_every_n = self.configuration.get('train_every_n', 900)
			
 
				-        self.z_smooth_n = self.configuration.get('z_smooth_n', 15)
			
 
				-        self.z_clip = self.configuration.get('z_clip', 10)
			
 
				-        self.z_abs = bool(self.configuration.get('z_abs', True))
			
 
				-        self.burn_in = self.configuration.get('burn_in', 2)
			
 
				-        self.mode = self.configuration.get('mode', 'per_chart')
			
 
				-        self.per_chart_agg = self.configuration.get('per_chart_agg', 'mean')
			
 
				-        self.order = ORDER
			
 
				-        self.definitions = CHARTS
			
 
				-        self.collected_dims = {'z': set(), '3stddev': set()}
			
 
				-        self.df_mean = pd.DataFrame()
			
 
				-        self.df_std = pd.DataFrame()
			
 
				-        self.df_z_history = pd.DataFrame()
			
 
				-
			
 
				-    def check(self):
			
 
				-        _ = get_allmetrics(self.host, self.charts_in_scope, wide=True, col_sep='.')
			
 
				-        return True
			
 
				-
			
 
				-    def validate_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1):
			
 
				-        """If dimension not in chart then add it.
			
 
				-        """
			
 
				-        for dim in data:
			
 
				-            if dim not in self.collected_dims[chart]:
			
 
				-                self.collected_dims[chart].add(dim)
			
 
				-                self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor])
			
 
				-
			
 
				-        for dim in list(self.collected_dims[chart]):
			
 
				-            if dim not in data:
			
 
				-                self.collected_dims[chart].remove(dim)
			
 
				-                self.charts[chart].del_dimension(dim, hide=False)
			
 
				-
			
 
				-    def train_model(self):
			
 
				-        """Calculate the mean and stddev for all relevant metrics and store them for use in calulcating zscore at each timestep.
			
 
				-        """
			
 
				-        before = int(datetime.now().timestamp()) - self.offset_secs
			
 
				-        after = before - self.train_secs
			
 
				-
			
 
				-        self.df_mean = get_data(
			
 
				-            self.host, self.charts_in_scope, after, before, points=10, group='average', col_sep='.'
			
 
				-        ).mean().to_frame().rename(columns={0: "mean"})
			
 
				-
			
 
				-        self.df_std = get_data(
			
 
				-            self.host, self.charts_in_scope, after, before, points=10, group='stddev', col_sep='.'
			
 
				-        ).mean().to_frame().rename(columns={0: "std"})
			
 
				-
			
 
				-    def create_data(self, df_allmetrics):
			
 
				-        """Use x, mean, stddev to generate z scores and 3stddev flags via some pandas manipulation.
			
 
				-        Returning two dictionaries of dimensions and measures, one for each chart.
			
 
				-
			
 
				-        :param df_allmetrics <pd.DataFrame>: pandas dataframe with latest data from api/v1/allmetrics.
			
 
				-        :return: (<dict>,<dict>) tuple of dictionaries, one for  zscores and the other for a flag if abs(z)>3.
			
 
				-        """
			
 
				-        # calculate clipped z score for each available metric
			
 
				-        df_z = pd.concat([self.df_mean, self.df_std, df_allmetrics], axis=1, join='inner')
			
 
				-        df_z['z'] = ((df_z['value'] - df_z['mean']) / df_z['std']).clip(-self.z_clip, self.z_clip).fillna(0) * 100
			
 
				-        if self.z_abs:
			
 
				-            df_z['z'] = df_z['z'].abs()
			
 
				-
			
 
				-        # append last z_smooth_n rows of zscores to history table in wide format
			
 
				-        self.df_z_history = self.df_z_history.append(
			
 
				-            df_z[['z']].reset_index().pivot_table(values='z', columns='index'), sort=True
			
 
				-        ).tail(self.z_smooth_n)
			
 
				-
			
 
				-        # get average zscore for last z_smooth_n for each metric
			
 
				-        df_z_smooth = self.df_z_history.melt(value_name='z').groupby('index')['z'].mean().to_frame()
			
 
				-        df_z_smooth['3stddev'] = np.where(abs(df_z_smooth['z']) > 300, 1, 0)
			
 
				-        data_z = df_z_smooth['z'].add_suffix('_z').to_dict()
			
 
				-
			
 
				-        # aggregate to chart level if specified
			
 
				-        if self.mode == 'per_chart':
			
 
				-            df_z_smooth['chart'] = ['.'.join(x[0:2]) + '_z' for x in df_z_smooth.index.str.split('.').to_list()]
			
 
				-            if self.per_chart_agg == 'absmax':
			
 
				-                data_z = \
			
 
				-                list(df_z_smooth.groupby('chart').agg({'z': lambda x: max(x, key=abs)})['z'].to_dict().values())[0]
			
 
				-            else:
			
 
				-                data_z = list(df_z_smooth.groupby('chart').agg({'z': [self.per_chart_agg]})['z'].to_dict().values())[0]
			
 
				-
			
 
				-        data_3stddev = {}
			
 
				-        for k in data_z:
			
 
				-            data_3stddev[k.replace('_z', '')] = 1 if abs(data_z[k]) > 300 else 0
			
 
				-
			
 
				-        return data_z, data_3stddev
			
 
				-
			
 
				-    def get_data(self):
			
 
				-
			
 
				-        if self.runs_counter <= self.burn_in or self.runs_counter % self.train_every_n == 0:
			
 
				-            self.train_model()
			
 
				-
			
 
				-        data_z, data_3stddev = self.create_data(
			
 
				-            get_allmetrics(self.host, self.charts_in_scope, wide=True, col_sep='.').transpose())
			
 
				-        data = {**data_z, **data_3stddev}
			
 
				-
			
 
				-        self.validate_charts('z', data_z, divisor=100)
			
 
				-        self.validate_charts('3stddev', data_3stddev)
			
 
				-
			
 
				-        return data
			
--- a/src/collectors/python.d.plugin/zscores/zscores.conf
+++ b/src/collectors/python.d.plugin/zscores/zscores.conf
@@ -1,108 +0,0 @@
 
				-# netdata python.d.plugin configuration for example
			
 
				-#
			
 
				-# This file is in YaML format. Generally the format is:
			
 
				-#
			
 
				-# name: value
			
 
				-#
			
 
				-# There are 2 sections:
			
 
				-#  - global variables
			
 
				-#  - one or more JOBS
			
 
				-#
			
 
				-# JOBS allow you to collect values from multiple sources.
			
 
				-# Each source will have its own set of charts.
			
 
				-#
			
 
				-# JOB parameters have to be indented (using spaces only, example below).
			
 
				-
			
 
				-# ----------------------------------------------------------------------
			
 
				-# Global Variables
			
 
				-# These variables set the defaults for all JOBs, however each JOB
			
 
				-# may define its own, overriding the defaults.
			
 
				-
			
 
				-# update_every sets the default data collection frequency.
			
 
				-# If unset, the python.d.plugin default is used.
			
 
				-update_every: 5
			
 
				-
			
 
				-# priority controls the order of charts at the netdata dashboard.
			
 
				-# Lower numbers move the charts towards the top of the page.
			
 
				-# If unset, the default for python.d.plugin is used.
			
 
				-# priority: 60000
			
 
				-
			
 
				-# penalty indicates whether to apply penalty to update_every in case of failures.
			
 
				-# Penalty will increase every 5 failed updates in a row. Maximum penalty is 10 minutes.
			
 
				-# penalty: yes
			
 
				-
			
 
				-# autodetection_retry sets the job re-check interval in seconds.
			
 
				-# The job is not deleted if check fails.
			
 
				-# Attempts to start the job are made once every autodetection_retry.
			
 
				-# This feature is disabled by default.
			
 
				-# autodetection_retry: 0
			
 
				-
			
 
				-# ----------------------------------------------------------------------
			
 
				-# JOBS (data collection sources)
			
 
				-#
			
 
				-# The default JOBS share the same *name*. JOBS with the same name
			
 
				-# are mutually exclusive. Only one of them will be allowed running at
			
 
				-# any time. This allows autodetection to try several alternatives and
			
 
				-# pick the one that works.
			
 
				-#
			
 
				-# Any number of jobs is supported.
			
 
				-#
			
 
				-# All python.d.plugin JOBS (for all its modules) support a set of
			
 
				-# predefined parameters. These are:
			
 
				-#
			
 
				-# job_name:
			
 
				-#     name: myname            # the JOB's name as it will appear at the
			
 
				-#                             # dashboard (by default is the job_name)
			
 
				-#                             # JOBs sharing a name are mutually exclusive
			
 
				-#     update_every: 1         # the JOB's data collection frequency
			
 
				-#     priority: 60000         # the JOB's order on the dashboard
			
 
				-#     penalty: yes            # the JOB's penalty
			
 
				-#     autodetection_retry: 0  # the JOB's re-check interval in seconds
			
 
				-#
			
 
				-# Additionally to the above, example also supports the following:
			
 
				-#
			
 
				-# - none
			
 
				-#
			
 
				-# ----------------------------------------------------------------------
			
 
				-# AUTO-DETECTION JOBS
			
 
				-# only one of them will run (they have the same name)
			
 
				-
			
 
				-local:
			
 
				-    name: 'local'
			
 
				-
			
 
				-    # what host to pull data from
			
 
				-    host: '127.0.0.1:19999'
			
 
				-
			
 
				-    # what charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
			
 
				-    charts_regex: 'system\..*'
			
 
				-
			
 
				-    # Charts to exclude, useful if you would like to exclude some specific charts. 
			
 
				-    # Note: should be a ',' separated string like 'chart.name,chart.name'.
			
 
				-    charts_to_exclude: 'system.uptime'
			
 
				-
			
 
				-    # length of time to base calculations off for mean and stddev
			
 
				-    train_secs: 14400 # use last 4 hours to work out the mean and stddev for the zscore
			
 
				-
			
 
				-    # offset preceding latest data to ignore when calculating mean and stddev
			
 
				-    offset_secs: 300 # ignore last 5 minutes of data when calculating the mean and stddev
			
 
				-
			
 
				-    # recalculate the mean and stddev every n steps of the collector
			
 
				-    train_every_n: 900 # recalculate mean and stddev every 15 minutes
			
 
				-
			
 
				-    # smooth the z score by averaging it over last n values
			
 
				-    z_smooth_n: 15 # take a rolling average of the last 15 zscore values to reduce sensitivity to temporary 'spikes'
			
 
				-
			
 
				-    # cap absolute value of zscore (before smoothing) for better stability
			
 
				-    z_clip: 10 # cap each zscore at 10 so as to avoid really large individual zscores swamping any rolling average
			
 
				-
			
 
				-    # set z_abs: 'true' to make all zscores be absolute values only.
			
 
				-    z_abs: 'true'
			
 
				-
			
 
				-    # burn in period in which to initially calculate mean and stddev on every step
			
 
				-    burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or initial calculations fail to return
			
 
				-
			
 
				-    # mode can be to get a zscore 'per_dim' or 'per_chart'
			
 
				-    mode: 'per_chart' # 'per_chart' means individual dimension level smoothed zscores will be aggregated to one zscore per chart per time step
			
 
				-
			
 
				-    # per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'
			
 
				-    per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dimensions but will maintain the sign. 'mean' will just average.