Browse Source

improvement(go.d/x509check): support checking full chain expiry time (#19001)

Ilya Mashchenko 4 months ago
parent
commit
a2b6132a47

+ 42 - 20
src/go/plugin/go.d/modules/x509check/charts.go

@@ -2,42 +2,64 @@
 
 package x509check
 
-import "github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module"
+import (
+	"fmt"
+	"strconv"
 
-var (
-	baseCharts = module.Charts{
-		timeUntilExpirationChart.Copy(),
-	}
-	withRevocationCharts = module.Charts{
-		timeUntilExpirationChart.Copy(),
-		revocationStatusChart.Copy(),
-	}
+	"github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module"
+)
 
-	timeUntilExpirationChart = module.Chart{
-		ID:    "time_until_expiration",
+var certChartsTmpl = module.Charts{
+	certTimeUntilExpirationChartTmpl.Copy(),
+	certRevocationStatusChartTmpl.Copy(),
+}
+
+var (
+	certTimeUntilExpirationChartTmpl = module.Chart{
+		ID:    "cert_depth%d_time_until_expiration",
 		Title: "Time Until Certificate Expiration",
 		Units: "seconds",
 		Fam:   "expiration time",
 		Ctx:   "x509check.time_until_expiration",
 		Opts:  module.Opts{StoreFirst: true},
 		Dims: module.Dims{
-			{ID: "expiry"},
-		},
-		Vars: module.Vars{
-			{ID: "days_until_expiration_warning"},
-			{ID: "days_until_expiration_critical"},
+			{ID: "cert_depth%d_expiry", Name: "expiry"},
 		},
 	}
-	revocationStatusChart = module.Chart{
-		ID:    "revocation_status",
+	certRevocationStatusChartTmpl = module.Chart{
+		ID:    "cert_depth%d_revocation_status",
 		Title: "Revocation Status",
 		Units: "boolean",
 		Fam:   "revocation",
 		Ctx:   "x509check.revocation_status",
 		Opts:  module.Opts{StoreFirst: true},
 		Dims: module.Dims{
-			{ID: "not_revoked"},
-			{ID: "revoked"},
+			{ID: "cert_depth%d_not_revoked", Name: "not_revoked"},
+			{ID: "cert_depth%d_revoked", Name: "revoked"},
 		},
 	}
 )
+
+func (x *X509Check) addCertCharts(commonName string, depth int) {
+	charts := certChartsTmpl.Copy()
+
+	if depth > 0 || !x.CheckRevocation {
+		_ = charts.Remove(certRevocationStatusChartTmpl.ID)
+	}
+
+	for _, chart := range *charts {
+		chart.ID = fmt.Sprintf(chart.ID, depth)
+		chart.Labels = []module.Label{
+			{Key: "source", Value: x.Source},
+			{Key: "common_name", Value: commonName},
+			{Key: "depth", Value: strconv.Itoa(depth)},
+		}
+		for _, dim := range chart.Dims {
+			dim.ID = fmt.Sprintf(dim.ID, depth)
+		}
+	}
+
+	if err := x.Charts().Add(*charts...); err != nil {
+		x.Warningf("failed to add charts for '%s': %v", commonName, err)
+	}
+}

+ 33 - 23
src/go/plugin/go.d/modules/x509check/collect.go

@@ -7,6 +7,8 @@ import (
 	"fmt"
 	"time"
 
+	"github.com/netdata/netdata/go/plugins/plugin/go.d/pkg/metrix"
+
 	"github.com/cloudflare/cfssl/revoke"
 )
 
@@ -22,37 +24,45 @@ func (x *X509Check) collect() (map[string]int64, error) {
 
 	mx := make(map[string]int64)
 
-	x.collectExpiration(mx, certs)
-	if x.CheckRevocation {
-		x.collectRevocation(mx, certs)
+	if err := x.collectCertificates(mx, certs); err != nil {
+		return nil, err
 	}
 
 	return mx, nil
 }
 
-func (x *X509Check) collectExpiration(mx map[string]int64, certs []*x509.Certificate) {
-	expiry := time.Until(certs[0].NotAfter).Seconds()
-	mx["expiry"] = int64(expiry)
-	mx["days_until_expiration_warning"] = x.DaysUntilWarn
-	mx["days_until_expiration_critical"] = x.DaysUntilCritical
+func (x *X509Check) collectCertificates(mx map[string]int64, certs []*x509.Certificate) error {
+	for i, cert := range certs {
+		cn := cert.Subject.CommonName
 
-}
+		if !x.seenCerts[cn] {
+			x.seenCerts[cn] = true
+			x.addCertCharts(cn, i)
+		}
 
-func (x *X509Check) collectRevocation(mx map[string]int64, certs []*x509.Certificate) {
-	rev, ok, err := revoke.VerifyCertificateError(certs[0])
-	if err != nil {
-		x.Debug(err)
-	}
-	if !ok {
-		return
-	}
+		px := fmt.Sprintf("cert_depth%d_", i)
+
+		expiry := int64(time.Until(cert.NotAfter).Seconds())
 
-	mx["revoked"] = 0
-	mx["not_revoked"] = 0
+		mx[px+"expiry"] = expiry
 
-	if rev {
-		mx["revoked"] = 1
-	} else {
-		mx["not_revoked"] = 1
+		if i == 0 && x.CheckRevocation {
+			rev, ok, err := revoke.VerifyCertificateError(certs[0])
+			if err != nil {
+				x.Debug(err)
+				continue
+			}
+			if !ok {
+				continue
+			}
+			mx[px+"revoked"] = metrix.Bool(rev)
+			mx[px+"not_revoked"] = metrix.Bool(!rev)
+		}
+
+		if !x.CheckFullChain {
+			break
+		}
 	}
+
+	return nil
 }

+ 8 - 18
src/go/plugin/go.d/modules/x509check/config_schema.json

@@ -23,25 +23,16 @@
         "minimum": 0.5,
         "default": 1
       },
+      "check_full_chain": {
+        "title": "Full chain",
+        "description": "Monitor expiration time for all certificates in the SSL/TLS chain, including intermediate and root certificates.",
+        "type": "boolean"
+      },
       "check_revocation_status": {
-        "title": "Revocation status check",
+        "title": "Revocation status",
         "description": "Whether to check the revocation status of the certificate.",
         "type": "boolean"
       },
-      "days_until_expiration_warning": {
-        "title": "Days until warning",
-        "description": "Number of days before the alarm status is set to warning.",
-        "type": "integer",
-        "minimum": 1,
-        "default": 14
-      },
-      "days_until_expiration_critical": {
-        "title": "Days until critical",
-        "description": "Number of days before the alarm status is set to critical.",
-        "type": "integer",
-        "minimum": 1,
-        "default": 7
-      },
       "tls_skip_verify": {
         "title": "Skip TLS verification",
         "description": "If set, TLS certificate verification will be skipped.",
@@ -94,9 +85,8 @@
             "update_every",
             "source",
             "timeout",
-            "check_revocation_status",
-            "days_until_expiration_warning",
-            "days_until_expiration_critical"
+            "check_full_chain",
+            "check_revocation_status"
           ]
         },
         {

+ 0 - 20
src/go/plugin/go.d/modules/x509check/init.go

@@ -4,8 +4,6 @@ package x509check
 
 import (
 	"errors"
-
-	"github.com/netdata/netdata/go/plugins/plugin/go.d/agent/module"
 )
 
 func (x *X509Check) validateConfig() error {
@@ -18,21 +16,3 @@ func (x *X509Check) validateConfig() error {
 func (x *X509Check) initProvider() (provider, error) {
 	return newProvider(x.Config)
 }
-
-func (x *X509Check) initCharts() *module.Charts {
-	var charts *module.Charts
-	if x.CheckRevocation {
-		charts = withRevocationCharts.Copy()
-	} else {
-		charts = baseCharts.Copy()
-	}
-
-	for _, chart := range *charts {
-		chart.Labels = []module.Label{
-			{Key: "source", Value: x.Source},
-		}
-	}
-
-	return charts
-
-}

+ 11 - 11
src/go/plugin/go.d/modules/x509check/metadata.yaml

@@ -62,13 +62,9 @@ modules:
               description: "Certificate source. Allowed schemes: https, tcp, tcp4, tcp6, udp, udp4, udp6, file, smtp."
               default_value: ""
               required: false
-            - name: days_until_expiration_warning
-              description: Number of days before the alarm status is warning.
-              default_value: 30
-              required: false
-            - name: days_until_expiration_critical
-              description: Number of days before the alarm status is critical.
-              default_value: 15
+            - name: check_full_chain
+              description: Monitor expiration time for all certificates in the SSL/TLS chain, including intermediate and root certificates.
+              default_value: false
               required: false
             - name: check_revocation_status
               description: Whether to check the revocation status of the certificate.
@@ -138,11 +134,11 @@ modules:
     alerts:
       - name: x509check_days_until_expiration
         metric: x509check.time_until_expiration
-        info: "Time until x509 certificate expires for ${label:source}"
+        info: "SSL cert expiring soon (${label:source} cn:${label:common_name})"
         link: https://github.com/netdata/netdata/blob/master/src/health/health.d/x509check.conf
       - name: x509check_revocation_status
         metric: x509check.revocation_status
-        info: "x509 certificate revocation status for ${label:source}"
+        info: "SSL cert revoked (${label:source})"
         link: https://github.com/netdata/netdata/blob/master/src/health/health.d/x509check.conf
     metrics:
       folding:
@@ -152,10 +148,14 @@ modules:
       availability: []
       scopes:
         - name: source
-          description: These metrics refer to the configured source.
+          description: These metrics refer to the SSL certificate.
           labels:
             - name: source
-              description: Configured source.
+              description: Same as the "source" configuration option.
+            - name: common_name
+              description: The common name (CN) extracted from the certificate.
+            - name: depth
+              description: The depth of the certificate within the certificate chain. The leaf certificate has a depth of 0, and subsequent certificates (intermediate certificates) have increasing depth values. The root certificate is at the highest depth.
           metrics:
             - name: x509check.time_until_expiration
               description: Time Until Certificate Expiration

+ 1 - 2
src/go/plugin/go.d/modules/x509check/testdata/config.json

@@ -6,7 +6,6 @@
   "tls_cert": "ok",
   "tls_key": "ok",
   "tls_skip_verify": true,
-  "days_until_expiration_warning": 123,
-  "days_until_expiration_critical": 123,
+  "check_full_chain": true,
   "check_revocation_status": true
 }

+ 1 - 2
src/go/plugin/go.d/modules/x509check/testdata/config.yaml

@@ -5,6 +5,5 @@ tls_ca: "ok"
 tls_cert: "ok"
 tls_key: "ok"
 tls_skip_verify: yes
-days_until_expiration_warning: 123
-days_until_expiration_critical: 123
+check_full_chain: yes
 check_revocation_status: yes

+ 13 - 12
src/go/plugin/go.d/modules/x509check/x509check.go

@@ -33,21 +33,22 @@ func init() {
 func New() *X509Check {
 	return &X509Check{
 		Config: Config{
-			Timeout:           confopt.Duration(time.Second * 2),
-			DaysUntilWarn:     14,
-			DaysUntilCritical: 7,
+			Timeout:        confopt.Duration(time.Second * 2),
+			CheckFullChain: false,
 		},
+
+		charts:    &module.Charts{},
+		seenCerts: make(map[string]bool),
 	}
 }
 
 type Config struct {
-	UpdateEvery       int              `yaml:"update_every,omitempty" json:"update_every"`
-	Source            string           `yaml:"source" json:"source"`
-	Timeout           confopt.Duration `yaml:"timeout,omitempty" json:"timeout"`
-	DaysUntilWarn     int64            `yaml:"days_until_expiration_warning,omitempty" json:"days_until_expiration_warning"`
-	DaysUntilCritical int64            `yaml:"days_until_expiration_critical,omitempty" json:"days_until_expiration_critical"`
-	CheckRevocation   bool             `yaml:"check_revocation_status" json:"check_revocation_status"`
-	tlscfg.TLSConfig  `yaml:",inline" json:""`
+	UpdateEvery      int              `yaml:"update_every,omitempty" json:"update_every"`
+	Source           string           `yaml:"source" json:"source"`
+	Timeout          confopt.Duration `yaml:"timeout,omitempty" json:"timeout"`
+	CheckFullChain   bool             `yaml:"check_full_chain" json:"check_full_chain"`
+	CheckRevocation  bool             `yaml:"check_revocation_status" json:"check_revocation_status"`
+	tlscfg.TLSConfig `yaml:",inline" json:""`
 }
 
 type X509Check struct {
@@ -57,6 +58,8 @@ type X509Check struct {
 	charts *module.Charts
 
 	prov provider
+
+	seenCerts map[string]bool
 }
 
 func (x *X509Check) Configuration() any {
@@ -74,8 +77,6 @@ func (x *X509Check) Init() error {
 	}
 	x.prov = prov
 
-	x.charts = x.initCharts()
-
 	return nil
 }
 

+ 6 - 6
src/health/health.d/x509check.conf

@@ -7,10 +7,10 @@ component: x509 certificates
      calc: $expiry / 86400
     units: days
     every: 60s
-     warn: $this < $days_until_expiration_warning
-     crit: $this < $days_until_expiration_critical
-  summary: x509 certificate expiration for ${label:source}
-     info: Time until x509 certificate expires for ${label:source}
+     warn: $this < 14
+     crit: $this < 7
+  summary: SSL cert expiring soon (${label:source} cn:${label:common_name})
+     info: SSL cert expiring soon (${label:source} cn:${label:common_name})
        to: webmaster
 
  template: x509check_revocation_status
@@ -22,6 +22,6 @@ component: x509 certificates
     units: status
     every: 60s
      crit: $this == 1
-  summary: x509 certificate revocation status for ${label:source}
-     info: x509 certificate revocation status for ${label:source}
+  summary: SSL cert revoked (${label:source})
+     info: SSL cert revoked (${label:source})
        to: webmaster