123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340 |
- // SPDX-License-Identifier: GPL-3.0-or-later
- #include "plugin_proc.h"
- static char *pci_aer_dirname = NULL;
- typedef enum __attribute__((packed)) {
- AER_DEV_NONFATAL = (1 << 0),
- AER_DEV_CORRECTABLE = (1 << 1),
- AER_DEV_FATAL = (1 << 2),
- AER_ROOTPORT_TOTAL_ERR_COR = (1 << 3),
- AER_ROOTPORT_TOTAL_ERR_FATAL = (1 << 4),
- } AER_TYPE;
- struct aer_value {
- kernel_uint_t count;
- RRDDIM *rd;
- };
- struct aer_entry {
- bool updated;
- STRING *name;
- AER_TYPE type;
- procfile *ff;
- DICTIONARY *values;
- RRDSET *st;
- };
- DICTIONARY *aer_root = NULL;
- static bool aer_value_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data __maybe_unused) {
- struct aer_value *v = old_value;
- struct aer_value *nv = new_value;
- v->count = nv->count;
- return false;
- }
- static void aer_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) {
- struct aer_entry *a = value;
- a->values = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_DONT_OVERWRITE_VALUE);
- dictionary_register_conflict_callback(a->values, aer_value_conflict_callback, NULL);
- }
- static void add_pci_aer(const char *base_dir, const char *d_name, AER_TYPE type) {
- char buffer[FILENAME_MAX + 1];
- snprintfz(buffer, FILENAME_MAX, "%s/%s", base_dir, d_name);
- struct aer_entry *a = dictionary_set(aer_root, buffer, NULL, sizeof(struct aer_entry));
- if(!a->name)
- a->name = string_strdupz(d_name);
- a->type = type;
- }
- static bool recursively_find_pci_aer(AER_TYPE types, const char *base_dir, const char *d_name, int depth) {
- if(depth > 100)
- return false;
- char buffer[FILENAME_MAX + 1];
- snprintfz(buffer, FILENAME_MAX, "%s/%s", base_dir, d_name);
- DIR *dir = opendir(buffer);
- if(unlikely(!dir)) {
- collector_error("Cannot read PCI_AER directory '%s'", buffer);
- return true;
- }
- struct dirent *de = NULL;
- while((de = readdir(dir))) {
- if(de->d_type == DT_DIR) {
- if(de->d_name[0] == '.')
- continue;
- recursively_find_pci_aer(types, buffer, de->d_name, depth + 1);
- }
- else if(de->d_type == DT_REG) {
- if((types & AER_DEV_NONFATAL) && strcmp(de->d_name, "aer_dev_nonfatal") == 0) {
- add_pci_aer(buffer, de->d_name, AER_DEV_NONFATAL);
- }
- else if((types & AER_DEV_CORRECTABLE) && strcmp(de->d_name, "aer_dev_correctable") == 0) {
- add_pci_aer(buffer, de->d_name, AER_DEV_CORRECTABLE);
- }
- else if((types & AER_DEV_FATAL) && strcmp(de->d_name, "aer_dev_fatal") == 0) {
- add_pci_aer(buffer, de->d_name, AER_DEV_FATAL);
- }
- else if((types & AER_ROOTPORT_TOTAL_ERR_COR) && strcmp(de->d_name, "aer_rootport_total_err_cor") == 0) {
- add_pci_aer(buffer, de->d_name, AER_ROOTPORT_TOTAL_ERR_COR);
- }
- else if((types & AER_ROOTPORT_TOTAL_ERR_FATAL) && strcmp(de->d_name, "aer_rootport_total_err_fatal") == 0) {
- add_pci_aer(buffer, de->d_name, AER_ROOTPORT_TOTAL_ERR_FATAL);
- }
- }
- }
- closedir(dir);
- return true;
- }
- static void find_all_pci_aer(AER_TYPE types) {
- char name[FILENAME_MAX + 1];
- snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices");
- pci_aer_dirname = config_get("plugin:proc:/sys/devices/pci/aer", "directory to monitor", name);
- DIR *dir = opendir(pci_aer_dirname);
- if(unlikely(!dir)) {
- collector_error("Cannot read PCI_AER directory '%s'", pci_aer_dirname);
- return;
- }
- struct dirent *de = NULL;
- while((de = readdir(dir))) {
- if(de->d_type == DT_DIR && de->d_name[0] == 'p' && de->d_name[1] == 'c' && de->d_name[2] == 'i' && isdigit(de->d_name[3]))
- recursively_find_pci_aer(types, pci_aer_dirname, de->d_name, 1);
- }
- closedir(dir);
- }
- static void read_pci_aer_values(const char *filename, struct aer_entry *t) {
- t->updated = false;
- if(unlikely(!t->ff)) {
- t->ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
- if(unlikely(!t->ff))
- return;
- }
- t->ff = procfile_readall(t->ff);
- if(unlikely(!t->ff || procfile_lines(t->ff) < 1 || procfile_linewords(t->ff, 0) < 1))
- return;
- size_t lines = procfile_lines(t->ff);
- for(size_t l = 0; l < lines ; l++) {
- if(procfile_linewords(t->ff, l) != 2)
- continue;
- struct aer_value v = {
- .count = str2ull(procfile_lineword(t->ff, l, 1), NULL)
- };
- char *key = procfile_lineword(t->ff, l, 0);
- if(!key || !*key || (key[0] == 'T' && key[1] == 'O' && key[2] == 'T' && key[3] == 'A' && key[4] == 'L' && key[5] == '_'))
- continue;
- dictionary_set(t->values, key, &v, sizeof(v));
- }
- t->updated = true;
- }
- static void read_pci_aer_count(const char *filename, struct aer_entry *t) {
- t->updated = false;
- if(unlikely(!t->ff)) {
- t->ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT);
- if(unlikely(!t->ff))
- return;
- }
- t->ff = procfile_readall(t->ff);
- if(unlikely(!t->ff || procfile_lines(t->ff) < 1 || procfile_linewords(t->ff, 0) < 1))
- return;
- struct aer_value v = {
- .count = str2ull(procfile_lineword(t->ff, 0, 0), NULL)
- };
- dictionary_set(t->values, "count", &v, sizeof(v));
- t->updated = true;
- }
- static void add_label_from_link(struct aer_entry *a, const char *path, const char *link) {
- char name[FILENAME_MAX + 1];
- strncpyz(name, path, FILENAME_MAX);
- char *slash = strrchr(name, '/');
- if(slash)
- *slash = '\0';
- char name2[FILENAME_MAX + 1];
- snprintfz(name2, FILENAME_MAX, "%s/%s", name, link);
- ssize_t len = readlink(name2, name, FILENAME_MAX);
- if(len != -1) {
- name[len] = '\0'; // Null-terminate the string
- slash = strrchr(name, '/');
- if(slash) slash++;
- else slash = name;
- rrdlabels_add(a->st->rrdlabels, link, slash, RRDLABEL_SRC_AUTO);
- }
- }
- int do_proc_sys_devices_pci_aer(int update_every, usec_t dt __maybe_unused) {
- if(unlikely(!aer_root)) {
- int do_root_ports = CONFIG_BOOLEAN_AUTO;
- int do_pci_slots = CONFIG_BOOLEAN_NO;
- char buffer[100 + 1] = "";
- rrdlabels_get_value_strcpyz(localhost->rrdlabels, buffer, 100, "_virtualization");
- if(strcmp(buffer, "none") != 0) {
- // no need to run on virtualized environments
- do_root_ports = CONFIG_BOOLEAN_NO;
- do_pci_slots = CONFIG_BOOLEAN_NO;
- }
- do_root_ports = config_get_boolean("plugin:proc:/sys/class/pci/aer", "enable root ports", do_root_ports);
- do_pci_slots = config_get_boolean("plugin:proc:/sys/class/pci/aer", "enable pci slots", do_pci_slots);
- if(!do_root_ports && !do_pci_slots)
- return 1;
- aer_root = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE);
- dictionary_register_insert_callback(aer_root, aer_insert_callback, NULL);
- AER_TYPE types = ((do_root_ports) ? (AER_ROOTPORT_TOTAL_ERR_COR|AER_ROOTPORT_TOTAL_ERR_FATAL) : 0) |
- ((do_pci_slots) ? (AER_DEV_FATAL|AER_DEV_NONFATAL|AER_DEV_CORRECTABLE) : 0);
- find_all_pci_aer(types);
- if(!dictionary_entries(aer_root))
- return 1;
- }
- struct aer_entry *a;
- dfe_start_read(aer_root, a) {
- switch(a->type) {
- case AER_DEV_NONFATAL:
- case AER_DEV_FATAL:
- case AER_DEV_CORRECTABLE:
- read_pci_aer_values(a_dfe.name, a);
- break;
- case AER_ROOTPORT_TOTAL_ERR_COR:
- case AER_ROOTPORT_TOTAL_ERR_FATAL:
- read_pci_aer_count(a_dfe.name, a);
- break;
- }
- if(!a->updated)
- continue;
- if(!a->st) {
- const char *title;
- const char *context;
- switch(a->type) {
- case AER_DEV_NONFATAL:
- title = "PCI Advanced Error Reporting (AER) Non-Fatal Errors";
- context = "pci.aer_nonfatal";
- break;
- case AER_DEV_FATAL:
- title = "PCI Advanced Error Reporting (AER) Fatal Errors";
- context = "pci.aer_fatal";
- break;
- case AER_DEV_CORRECTABLE:
- title = "PCI Advanced Error Reporting (AER) Correctable Errors";
- context = "pci.aer_correctable";
- break;
- case AER_ROOTPORT_TOTAL_ERR_COR:
- title = "PCI Root-Port Advanced Error Reporting (AER) Correctable Errors";
- context = "pci.rootport_aer_correctable";
- break;
- case AER_ROOTPORT_TOTAL_ERR_FATAL:
- title = "PCI Root-Port Advanced Error Reporting (AER) Fatal Errors";
- context = "pci.rootport_aer_fatal";
- break;
- default:
- title = "Unknown PCI Advanced Error Reporting";
- context = "pci.unknown_aer";
- break;
- }
- char id[RRD_ID_LENGTH_MAX + 1];
- char nm[RRD_ID_LENGTH_MAX + 1];
- size_t len = strlen(pci_aer_dirname);
- const char *fname = a_dfe.name;
- if(strncmp(a_dfe.name, pci_aer_dirname, len) == 0)
- fname = &a_dfe.name[len];
- if(*fname == '/')
- fname++;
- snprintfz(id, RRD_ID_LENGTH_MAX, "%s_%s", &context[4], fname);
- char *slash = strrchr(id, '/');
- if(slash)
- *slash = '\0';
- netdata_fix_chart_id(id);
- snprintfz(nm, RRD_ID_LENGTH_MAX, "%s", fname);
- slash = strrchr(nm, '/');
- if(slash)
- *slash = '\0';
- a->st = rrdset_create_localhost(
- "pci"
- , id
- , NULL
- , "aer"
- , context
- , title
- , "errors/s"
- , PLUGIN_PROC_NAME
- , "/sys/devices/pci/aer"
- , NETDATA_CHART_PRIO_PCI_AER
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdlabels_add(a->st->rrdlabels, "device", nm, RRDLABEL_SRC_AUTO);
- add_label_from_link(a, a_dfe.name, "driver");
- struct aer_value *v;
- dfe_start_read(a->values, v) {
- v->rd = rrddim_add(a->st, v_dfe.name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
- dfe_done(v);
- }
- struct aer_value *v;
- dfe_start_read(a->values, v) {
- if(unlikely(!v->rd))
- v->rd = rrddim_add(a->st, v_dfe.name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- rrddim_set_by_pointer(a->st, v->rd, (collected_number)v->count);
- }
- dfe_done(v);
- rrdset_done(a->st);
- }
- dfe_done(a);
- return 0;
- }
|