sys_devices_system_edac_mc.c 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. #include "plugin_proc.h"
  3. struct mc {
  4. char *name;
  5. char ce_updated;
  6. char ue_updated;
  7. char *ce_count_filename;
  8. char *ue_count_filename;
  9. procfile *ce_ff;
  10. procfile *ue_ff;
  11. collected_number ce_count;
  12. collected_number ue_count;
  13. RRDDIM *ce_rd;
  14. RRDDIM *ue_rd;
  15. struct mc *next;
  16. };
  17. static struct mc *mc_root = NULL;
  18. static void find_all_mc() {
  19. char name[FILENAME_MAX + 1];
  20. snprintfz(name, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/sys/devices/system/edac/mc");
  21. char *dirname = config_get("plugin:proc:/sys/devices/system/edac/mc", "directory to monitor", name);
  22. DIR *dir = opendir(dirname);
  23. if(unlikely(!dir)) {
  24. error("Cannot read ECC memory errors directory '%s'", dirname);
  25. return;
  26. }
  27. struct dirent *de = NULL;
  28. while((de = readdir(dir))) {
  29. if(de->d_type == DT_DIR && de->d_name[0] == 'm' && de->d_name[1] == 'c' && isdigit(de->d_name[2])) {
  30. struct mc *m = callocz(1, sizeof(struct mc));
  31. m->name = strdupz(de->d_name);
  32. struct stat st;
  33. snprintfz(name, FILENAME_MAX, "%s/%s/ce_count", dirname, de->d_name);
  34. if(stat(name, &st) != -1)
  35. m->ce_count_filename = strdupz(name);
  36. snprintfz(name, FILENAME_MAX, "%s/%s/ue_count", dirname, de->d_name);
  37. if(stat(name, &st) != -1)
  38. m->ue_count_filename = strdupz(name);
  39. if(!m->ce_count_filename && !m->ue_count_filename) {
  40. freez(m->name);
  41. freez(m);
  42. }
  43. else {
  44. m->next = mc_root;
  45. mc_root = m;
  46. }
  47. }
  48. }
  49. closedir(dir);
  50. }
  51. int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt) {
  52. (void)dt;
  53. if(unlikely(mc_root == NULL)) {
  54. find_all_mc();
  55. if(unlikely(mc_root == NULL))
  56. return 1;
  57. }
  58. static int do_ce = -1, do_ue = -1;
  59. NETDATA_DOUBLE ce_sum = 0, ue_sum = 0;
  60. struct mc *m;
  61. if(unlikely(do_ce == -1)) {
  62. do_ce = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/edac/mc", "enable ECC memory correctable errors", CONFIG_BOOLEAN_AUTO);
  63. do_ue = config_get_boolean_ondemand("plugin:proc:/sys/devices/system/edac/mc", "enable ECC memory uncorrectable errors", CONFIG_BOOLEAN_AUTO);
  64. }
  65. if(do_ce != CONFIG_BOOLEAN_NO) {
  66. for(m = mc_root; m; m = m->next) {
  67. if(m->ce_count_filename) {
  68. m->ce_updated = 0;
  69. if(unlikely(!m->ce_ff)) {
  70. m->ce_ff = procfile_open(m->ce_count_filename, " \t", PROCFILE_FLAG_DEFAULT);
  71. if(unlikely(!m->ce_ff))
  72. continue;
  73. }
  74. m->ce_ff = procfile_readall(m->ce_ff);
  75. if(unlikely(!m->ce_ff || procfile_lines(m->ce_ff) < 1 || procfile_linewords(m->ce_ff, 0) < 1))
  76. continue;
  77. m->ce_count = str2ull(procfile_lineword(m->ce_ff, 0, 0));
  78. ce_sum += m->ce_count;
  79. m->ce_updated = 1;
  80. }
  81. }
  82. }
  83. if(do_ue != CONFIG_BOOLEAN_NO) {
  84. for(m = mc_root; m; m = m->next) {
  85. if(m->ue_count_filename) {
  86. m->ue_updated = 0;
  87. if(unlikely(!m->ue_ff)) {
  88. m->ue_ff = procfile_open(m->ue_count_filename, " \t", PROCFILE_FLAG_DEFAULT);
  89. if(unlikely(!m->ue_ff))
  90. continue;
  91. }
  92. m->ue_ff = procfile_readall(m->ue_ff);
  93. if(unlikely(!m->ue_ff || procfile_lines(m->ue_ff) < 1 || procfile_linewords(m->ue_ff, 0) < 1))
  94. continue;
  95. m->ue_count = str2ull(procfile_lineword(m->ue_ff, 0, 0));
  96. ue_sum += m->ue_count;
  97. m->ue_updated = 1;
  98. }
  99. }
  100. }
  101. // --------------------------------------------------------------------
  102. if(do_ce == CONFIG_BOOLEAN_YES || (do_ce == CONFIG_BOOLEAN_AUTO &&
  103. (ce_sum > 0 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
  104. do_ce = CONFIG_BOOLEAN_YES;
  105. static RRDSET *ce_st = NULL;
  106. if(unlikely(!ce_st)) {
  107. ce_st = rrdset_create_localhost(
  108. "mem"
  109. , "ecc_ce"
  110. , NULL
  111. , "ecc"
  112. , NULL
  113. , "ECC Memory Correctable Errors"
  114. , "errors"
  115. , PLUGIN_PROC_NAME
  116. , "/sys/devices/system/edac/mc"
  117. , NETDATA_CHART_PRIO_MEM_HW_ECC_CE
  118. , update_every
  119. , RRDSET_TYPE_LINE
  120. );
  121. }
  122. else
  123. rrdset_next(ce_st);
  124. for(m = mc_root; m; m = m->next) {
  125. if (m->ce_count_filename && m->ce_updated) {
  126. if(unlikely(!m->ce_rd))
  127. m->ce_rd = rrddim_add(ce_st, m->name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  128. rrddim_set_by_pointer(ce_st, m->ce_rd, m->ce_count);
  129. }
  130. }
  131. rrdset_done(ce_st);
  132. }
  133. // --------------------------------------------------------------------
  134. if(do_ue == CONFIG_BOOLEAN_YES || (do_ue == CONFIG_BOOLEAN_AUTO &&
  135. (ue_sum > 0 || netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
  136. do_ue = CONFIG_BOOLEAN_YES;
  137. static RRDSET *ue_st = NULL;
  138. if(unlikely(!ue_st)) {
  139. ue_st = rrdset_create_localhost(
  140. "mem"
  141. , "ecc_ue"
  142. , NULL
  143. , "ecc"
  144. , NULL
  145. , "ECC Memory Uncorrectable Errors"
  146. , "errors"
  147. , PLUGIN_PROC_NAME
  148. , "/sys/devices/system/edac/mc"
  149. , NETDATA_CHART_PRIO_MEM_HW_ECC_UE
  150. , update_every
  151. , RRDSET_TYPE_LINE
  152. );
  153. }
  154. else
  155. rrdset_next(ue_st);
  156. for(m = mc_root; m; m = m->next) {
  157. if (m->ue_count_filename && m->ue_updated) {
  158. if(unlikely(!m->ue_rd))
  159. m->ue_rd = rrddim_add(ue_st, m->name, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
  160. rrddim_set_by_pointer(ue_st, m->ue_rd, m->ue_count);
  161. }
  162. }
  163. rrdset_done(ue_st);
  164. }
  165. return 0;
  166. }