freeipmi_plugin.c 73 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. /*
  3. * netdata freeipmi.plugin
  4. * Copyright (C) 2023 Netdata Inc.
  5. * GPL v3+
  6. *
  7. * Based on:
  8. * ipmimonitoring-sensors.c,v 1.51 2016/11/02 23:46:24 chu11 Exp
  9. * ipmimonitoring-sel.c,v 1.51 2016/11/02 23:46:24 chu11 Exp
  10. *
  11. * Copyright (C) 2007-2015 Lawrence Livermore National Security, LLC.
  12. * Copyright (C) 2006-2007 The Regents of the University of California.
  13. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  14. * Written by Albert Chu <chu11@llnl.gov>
  15. * UCRL-CODE-222073
  16. */
  17. // ----------------------------------------------------------------------------
  18. // BEGIN NETDATA CODE
  19. // #define NETDATA_TIMING_REPORT 1
  20. #include "libnetdata/libnetdata.h"
  21. #include "libnetdata/required_dummies.h"
  22. // component names, based on our patterns
  23. #define NETDATA_SENSOR_COMPONENT_MEMORY_MODULE "Memory Module"
  24. #define NETDATA_SENSOR_COMPONENT_MEMORY "Memory"
  25. #define NETDATA_SENSOR_COMPONENT_PROCESSOR "Processor"
  26. #define NETDATA_SENSOR_COMPONENT_IPU "Image Processor"
  27. #define NETDATA_SENSOR_COMPONENT_STORAGE "Storage"
  28. #define NETDATA_SENSOR_COMPONENT_MOTHERBOARD "Motherboard"
  29. #define NETDATA_SENSOR_COMPONENT_NETWORK "Network"
  30. #define NETDATA_SENSOR_COMPONENT_POWER_SUPPLY "Power Supply"
  31. #define NETDATA_SENSOR_COMPONENT_SYSTEM "System"
  32. #define NETDATA_SENSOR_COMPONENT_PERIPHERAL "Peripheral"
  33. // netdata plugin defaults
  34. #define SENSORS_DICT_KEY_SIZE 2048 // the max size of the key for the dictionary of sensors
  35. #define SPEED_TEST_ITERATIONS 5 // how many times to repeat data collection to decide latency
  36. #define IPMI_SENSORS_DASHBOARD_PRIORITY 90000 // the priority of the sensors charts on the dashboard
  37. #define IPMI_SEL_DASHBOARD_PRIORITY 99000 // the priority of the SEL events chart on the dashboard
  38. #define IPMI_SENSORS_MIN_UPDATE_EVERY 5 // the minimum data collection frequency for sensors
  39. #define IPMI_SEL_MIN_UPDATE_EVERY 30 // the minimum data collection frequency for SEL events
  40. #define IPMI_ENABLE_SEL_BY_DEFAULT true // true/false, to enable/disable SEL by default
  41. #define IPMI_RESTART_EVERY_SECONDS 14400 // restart the plugin every this many seconds
  42. // this is to prevent possible bugs/leaks in ipmi libraries
  43. #define IPMI_RESTART_IF_SENSORS_DONT_ITERATE_EVERY_SECONDS (10 * 60) // stale data collection detection time
  44. // forward definition of functions and structures
  45. struct netdata_ipmi_state;
  46. static void netdata_update_ipmi_sensor_reading(
  47. int record_id
  48. , int sensor_number
  49. , int sensor_type
  50. , int sensor_state
  51. , int sensor_units
  52. , int sensor_reading_type
  53. , char *sensor_name
  54. , void *sensor_reading
  55. , int event_reading_type_code
  56. , int sensor_bitmask_type
  57. , int sensor_bitmask
  58. , char **sensor_bitmask_strings
  59. , struct netdata_ipmi_state *state
  60. );
  61. static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events);
  62. // END NETDATA CODE
  63. // ----------------------------------------------------------------------------
  64. #include <stdio.h>
  65. #include <stdlib.h>
  66. #include <stdint.h>
  67. #include <string.h>
  68. #include <assert.h>
  69. #include <errno.h>
  70. #include <unistd.h>
  71. #include <sys/time.h>
  72. #include <ipmi_monitoring.h>
  73. #include <ipmi_monitoring_bitmasks.h>
  74. #include <ipmi_monitoring_offsets.h>
  75. /* Communication Configuration - Initialize accordingly */
  76. /* Hostname, NULL for In-band communication, non-null for a hostname */
  77. char *hostname = NULL;
  78. /* In-band Communication Configuration */
  79. int driver_type = -1; // IPMI_MONITORING_DRIVER_TYPE_KCS, etc. or -1 for default
  80. int disable_auto_probe = 0; /* probe for in-band device */
  81. unsigned int driver_address = 0; /* not used if probing */
  82. unsigned int register_spacing = 0; /* not used if probing */
  83. char *driver_device = NULL; /* not used if probing */
  84. /* Out-of-band Communication Configuration */
  85. int protocol_version = -1; // IPMI_MONITORING_PROTOCOL_VERSION_1_5, etc. or -1 for default
  86. char *username = "";
  87. char *password = "";
  88. unsigned char *k_g = NULL;
  89. unsigned int k_g_len = 0;
  90. int privilege_level = -1; // IPMI_MONITORING_PRIVILEGE_LEVEL_USER, etc. or -1 for default
  91. int authentication_type = -1; // IPMI_MONITORING_AUTHENTICATION_TYPE_MD5, etc. or -1 for default
  92. int cipher_suite_id = -1; /* 0 or -1 for default */
  93. int session_timeout = 0; /* 0 for default */
  94. int retransmission_timeout = 0; /* 0 for default */
  95. /* Workarounds - specify workaround flags if necessary */
  96. unsigned int workaround_flags = 0;
  97. /* Set to an appropriate alternate if desired */
  98. char *sdr_cache_directory = "/tmp";
  99. char *sdr_sensors_cache_format = ".netdata-freeipmi-sensors-%H-on-%L.sdr";
  100. char *sdr_sel_cache_format = ".netdata-freeipmi-sel-%H-on-%L.sdr";
  101. char *sensor_config_file = NULL;
  102. char *sel_config_file = NULL;
  103. // controlled via command line options
  104. unsigned int global_sel_flags = IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE;
  105. unsigned int global_sensor_reading_flags = IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING|IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE;
  106. bool remove_reread_sdr_after_first_use = true;
  107. /* Initialization flags
  108. *
  109. * Most commonly bitwise OR IPMI_MONITORING_FLAGS_DEBUG and/or
  110. * IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS for extra debugging
  111. * information.
  112. */
  113. unsigned int ipmimonitoring_init_flags = 0;
  114. // ----------------------------------------------------------------------------
  115. // functions common to sensors and SEL
  116. static void initialize_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) {
  117. fatal_assert(ipmi_config);
  118. ipmi_config->driver_type = driver_type;
  119. ipmi_config->disable_auto_probe = disable_auto_probe;
  120. ipmi_config->driver_address = driver_address;
  121. ipmi_config->register_spacing = register_spacing;
  122. ipmi_config->driver_device = driver_device;
  123. ipmi_config->protocol_version = protocol_version;
  124. ipmi_config->username = username;
  125. ipmi_config->password = password;
  126. ipmi_config->k_g = k_g;
  127. ipmi_config->k_g_len = k_g_len;
  128. ipmi_config->privilege_level = privilege_level;
  129. ipmi_config->authentication_type = authentication_type;
  130. ipmi_config->cipher_suite_id = cipher_suite_id;
  131. ipmi_config->session_timeout_len = session_timeout;
  132. ipmi_config->retransmission_timeout_len = retransmission_timeout;
  133. ipmi_config->workaround_flags = workaround_flags;
  134. }
  135. static const char *netdata_ipmi_get_sensor_type_string (int sensor_type, const char **component) {
  136. switch (sensor_type) {
  137. case IPMI_MONITORING_SENSOR_TYPE_RESERVED:
  138. return ("Reserved");
  139. case IPMI_MONITORING_SENSOR_TYPE_TEMPERATURE:
  140. return ("Temperature");
  141. case IPMI_MONITORING_SENSOR_TYPE_VOLTAGE:
  142. return ("Voltage");
  143. case IPMI_MONITORING_SENSOR_TYPE_CURRENT:
  144. return ("Current");
  145. case IPMI_MONITORING_SENSOR_TYPE_FAN:
  146. return ("Fan");
  147. case IPMI_MONITORING_SENSOR_TYPE_PHYSICAL_SECURITY:
  148. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  149. return ("Physical Security");
  150. case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_SECURITY_VIOLATION_ATTEMPT:
  151. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  152. return ("Platform Security Violation Attempt");
  153. case IPMI_MONITORING_SENSOR_TYPE_PROCESSOR:
  154. *component = NETDATA_SENSOR_COMPONENT_PROCESSOR;
  155. return ("Processor");
  156. case IPMI_MONITORING_SENSOR_TYPE_POWER_SUPPLY:
  157. *component = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY;
  158. return ("Power Supply");
  159. case IPMI_MONITORING_SENSOR_TYPE_POWER_UNIT:
  160. *component = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY;
  161. return ("Power Unit");
  162. case IPMI_MONITORING_SENSOR_TYPE_COOLING_DEVICE:
  163. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  164. return ("Cooling Device");
  165. case IPMI_MONITORING_SENSOR_TYPE_OTHER_UNITS_BASED_SENSOR:
  166. return ("Other Units Based Sensor");
  167. case IPMI_MONITORING_SENSOR_TYPE_MEMORY:
  168. *component = NETDATA_SENSOR_COMPONENT_MEMORY;
  169. return ("Memory");
  170. case IPMI_MONITORING_SENSOR_TYPE_DRIVE_SLOT:
  171. *component = NETDATA_SENSOR_COMPONENT_STORAGE;
  172. return ("Drive Slot");
  173. case IPMI_MONITORING_SENSOR_TYPE_POST_MEMORY_RESIZE:
  174. *component = NETDATA_SENSOR_COMPONENT_MEMORY;
  175. return ("POST Memory Resize");
  176. case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_FIRMWARE_PROGRESS:
  177. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  178. return ("System Firmware Progress");
  179. case IPMI_MONITORING_SENSOR_TYPE_EVENT_LOGGING_DISABLED:
  180. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  181. return ("Event Logging Disabled");
  182. case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG1:
  183. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  184. return ("Watchdog 1");
  185. case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_EVENT:
  186. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  187. return ("System Event");
  188. case IPMI_MONITORING_SENSOR_TYPE_CRITICAL_INTERRUPT:
  189. return ("Critical Interrupt");
  190. case IPMI_MONITORING_SENSOR_TYPE_BUTTON_SWITCH:
  191. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  192. return ("Button/Switch");
  193. case IPMI_MONITORING_SENSOR_TYPE_MODULE_BOARD:
  194. return ("Module/Board");
  195. case IPMI_MONITORING_SENSOR_TYPE_MICROCONTROLLER_COPROCESSOR:
  196. *component = NETDATA_SENSOR_COMPONENT_PROCESSOR;
  197. return ("Microcontroller/Coprocessor");
  198. case IPMI_MONITORING_SENSOR_TYPE_ADD_IN_CARD:
  199. return ("Add In Card");
  200. case IPMI_MONITORING_SENSOR_TYPE_CHASSIS:
  201. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  202. return ("Chassis");
  203. case IPMI_MONITORING_SENSOR_TYPE_CHIP_SET:
  204. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  205. return ("Chip Set");
  206. case IPMI_MONITORING_SENSOR_TYPE_OTHER_FRU:
  207. return ("Other Fru");
  208. case IPMI_MONITORING_SENSOR_TYPE_CABLE_INTERCONNECT:
  209. return ("Cable/Interconnect");
  210. case IPMI_MONITORING_SENSOR_TYPE_TERMINATOR:
  211. return ("Terminator");
  212. case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_BOOT_INITIATED:
  213. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  214. return ("System Boot Initiated");
  215. case IPMI_MONITORING_SENSOR_TYPE_BOOT_ERROR:
  216. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  217. return ("Boot Error");
  218. case IPMI_MONITORING_SENSOR_TYPE_OS_BOOT:
  219. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  220. return ("OS Boot");
  221. case IPMI_MONITORING_SENSOR_TYPE_OS_CRITICAL_STOP:
  222. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  223. return ("OS Critical Stop");
  224. case IPMI_MONITORING_SENSOR_TYPE_SLOT_CONNECTOR:
  225. return ("Slot/Connector");
  226. case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_ACPI_POWER_STATE:
  227. return ("System ACPI Power State");
  228. case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG2:
  229. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  230. return ("Watchdog 2");
  231. case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_ALERT:
  232. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  233. return ("Platform Alert");
  234. case IPMI_MONITORING_SENSOR_TYPE_ENTITY_PRESENCE:
  235. return ("Entity Presence");
  236. case IPMI_MONITORING_SENSOR_TYPE_MONITOR_ASIC_IC:
  237. return ("Monitor ASIC/IC");
  238. case IPMI_MONITORING_SENSOR_TYPE_LAN:
  239. *component = NETDATA_SENSOR_COMPONENT_NETWORK;
  240. return ("LAN");
  241. case IPMI_MONITORING_SENSOR_TYPE_MANAGEMENT_SUBSYSTEM_HEALTH:
  242. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  243. return ("Management Subsystem Health");
  244. case IPMI_MONITORING_SENSOR_TYPE_BATTERY:
  245. return ("Battery");
  246. case IPMI_MONITORING_SENSOR_TYPE_SESSION_AUDIT:
  247. return ("Session Audit");
  248. case IPMI_MONITORING_SENSOR_TYPE_VERSION_CHANGE:
  249. return ("Version Change");
  250. case IPMI_MONITORING_SENSOR_TYPE_FRU_STATE:
  251. return ("FRU State");
  252. case IPMI_MONITORING_SENSOR_TYPE_UNKNOWN:
  253. return ("Unknown");
  254. default:
  255. if(sensor_type >= IPMI_MONITORING_SENSOR_TYPE_OEM_MIN && sensor_type <= IPMI_MONITORING_SENSOR_TYPE_OEM_MAX)
  256. return ("OEM");
  257. return ("Unrecognized");
  258. }
  259. }
  260. #define netdata_ipmi_get_value_int(var, func, ctx) do { \
  261. (var) = func(ctx); \
  262. if( (var) < 0) { \
  263. collector_error("%s(): call to " #func " failed: %s", \
  264. __FUNCTION__, ipmi_monitoring_ctx_errormsg(ctx)); \
  265. goto cleanup; \
  266. } \
  267. timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \
  268. } while(0)
  269. #define netdata_ipmi_get_value_ptr(var, func, ctx) do { \
  270. (var) = func(ctx); \
  271. if(!(var)) { \
  272. collector_error("%s(): call to " #func " failed: %s", \
  273. __FUNCTION__, ipmi_monitoring_ctx_errormsg(ctx)); \
  274. goto cleanup; \
  275. } \
  276. timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \
  277. } while(0)
  278. #define netdata_ipmi_get_value_no_check(var, func, ctx) do { \
  279. (var) = func(ctx); \
  280. timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \
  281. } while(0)
  282. static int netdata_read_ipmi_sensors(struct ipmi_monitoring_ipmi_config *ipmi_config, struct netdata_ipmi_state *state) {
  283. timing_init();
  284. ipmi_monitoring_ctx_t ctx = NULL;
  285. unsigned int sensor_reading_flags = global_sensor_reading_flags;
  286. int i;
  287. int sensor_count;
  288. int rv = -1;
  289. if (!(ctx = ipmi_monitoring_ctx_create ())) {
  290. collector_error("ipmi_monitoring_ctx_create()");
  291. goto cleanup;
  292. }
  293. timing_step(TIMING_STEP_FREEIPMI_CTX_CREATE);
  294. if (sdr_cache_directory) {
  295. if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, sdr_cache_directory) < 0) {
  296. collector_error("ipmi_monitoring_ctx_sdr_cache_directory(): %s\n", ipmi_monitoring_ctx_errormsg (ctx));
  297. goto cleanup;
  298. }
  299. }
  300. if (sdr_sensors_cache_format) {
  301. if (ipmi_monitoring_ctx_sdr_cache_filenames(ctx, sdr_sensors_cache_format) < 0) {
  302. collector_error("ipmi_monitoring_ctx_sdr_cache_filenames(): %s\n", ipmi_monitoring_ctx_errormsg (ctx));
  303. goto cleanup;
  304. }
  305. }
  306. timing_step(TIMING_STEP_FREEIPMI_DSR_CACHE_DIR);
  307. // Must call otherwise only default interpretations ever used
  308. // sensor_config_file can be NULL
  309. if (ipmi_monitoring_ctx_sensor_config_file (ctx, sensor_config_file) < 0) {
  310. collector_error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", ipmi_monitoring_ctx_errormsg (ctx));
  311. goto cleanup;
  312. }
  313. timing_step(TIMING_STEP_FREEIPMI_SENSOR_CONFIG_FILE);
  314. if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx,
  315. hostname,
  316. ipmi_config,
  317. sensor_reading_flags,
  318. NULL,
  319. 0,
  320. NULL,
  321. NULL)) < 0) {
  322. collector_error( "ipmi_monitoring_sensor_readings_by_record_id(): %s",
  323. ipmi_monitoring_ctx_errormsg (ctx));
  324. goto cleanup;
  325. }
  326. timing_step(TIMING_STEP_FREEIPMI_SENSOR_READINGS_BY_X);
  327. for (i = 0; i < sensor_count; i++, ipmi_monitoring_sensor_iterator_next (ctx)) {
  328. int record_id, sensor_number, sensor_type, sensor_state, sensor_units,
  329. sensor_bitmask_type, sensor_bitmask, event_reading_type_code, sensor_reading_type;
  330. char **sensor_bitmask_strings = NULL;
  331. char *sensor_name = NULL;
  332. void *sensor_reading;
  333. netdata_ipmi_get_value_int(record_id, ipmi_monitoring_sensor_read_record_id, ctx);
  334. netdata_ipmi_get_value_int(sensor_number, ipmi_monitoring_sensor_read_sensor_number, ctx);
  335. netdata_ipmi_get_value_int(sensor_type, ipmi_monitoring_sensor_read_sensor_type, ctx);
  336. netdata_ipmi_get_value_ptr(sensor_name, ipmi_monitoring_sensor_read_sensor_name, ctx);
  337. netdata_ipmi_get_value_int(sensor_state, ipmi_monitoring_sensor_read_sensor_state, ctx);
  338. netdata_ipmi_get_value_int(sensor_units, ipmi_monitoring_sensor_read_sensor_units, ctx);
  339. netdata_ipmi_get_value_int(sensor_bitmask_type, ipmi_monitoring_sensor_read_sensor_bitmask_type, ctx);
  340. netdata_ipmi_get_value_int(sensor_bitmask, ipmi_monitoring_sensor_read_sensor_bitmask, ctx);
  341. // it's ok for this to be NULL, i.e. sensor_bitmask == IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN
  342. netdata_ipmi_get_value_no_check(sensor_bitmask_strings, ipmi_monitoring_sensor_read_sensor_bitmask_strings, ctx);
  343. netdata_ipmi_get_value_int(sensor_reading_type, ipmi_monitoring_sensor_read_sensor_reading_type, ctx);
  344. // whatever we read from the sensor, it is ok
  345. netdata_ipmi_get_value_no_check(sensor_reading, ipmi_monitoring_sensor_read_sensor_reading, ctx);
  346. netdata_ipmi_get_value_int(event_reading_type_code, ipmi_monitoring_sensor_read_event_reading_type_code, ctx);
  347. netdata_update_ipmi_sensor_reading(
  348. record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type, sensor_name,
  349. sensor_reading, event_reading_type_code, sensor_bitmask_type, sensor_bitmask, sensor_bitmask_strings,
  350. state
  351. );
  352. #ifdef NETDATA_COMMENTED
  353. /* It is possible you may want to monitor specific event
  354. * conditions that may occur. If that is the case, you may want
  355. * to check out what specific bitmask type and bitmask events
  356. * occurred. See ipmi_monitoring_bitmasks.h for a list of
  357. * bitmasks and types.
  358. */
  359. if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN)
  360. printf (", %Xh", sensor_bitmask);
  361. else
  362. printf (", N/A");
  363. if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN
  364. && sensor_bitmask_strings)
  365. {
  366. unsigned int i = 0;
  367. printf (",");
  368. while (sensor_bitmask_strings[i])
  369. {
  370. printf (" ");
  371. printf ("'%s'",
  372. sensor_bitmask_strings[i]);
  373. i++;
  374. }
  375. }
  376. else
  377. printf (", N/A");
  378. printf ("\n");
  379. #endif // NETDATA_COMMENTED
  380. }
  381. rv = 0;
  382. cleanup:
  383. if (ctx)
  384. ipmi_monitoring_ctx_destroy (ctx);
  385. timing_report();
  386. if(remove_reread_sdr_after_first_use)
  387. global_sensor_reading_flags &= ~(IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE);
  388. return (rv);
  389. }
  390. static int netdata_get_ipmi_sel_events_count(struct ipmi_monitoring_ipmi_config *ipmi_config, struct netdata_ipmi_state *state) {
  391. timing_init();
  392. ipmi_monitoring_ctx_t ctx = NULL;
  393. unsigned int sel_flags = global_sel_flags;
  394. int sel_count;
  395. int rv = -1;
  396. if (!(ctx = ipmi_monitoring_ctx_create ())) {
  397. collector_error("ipmi_monitoring_ctx_create()");
  398. goto cleanup;
  399. }
  400. if (sdr_cache_directory) {
  401. if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, sdr_cache_directory) < 0) {
  402. collector_error( "ipmi_monitoring_ctx_sdr_cache_directory(): %s", ipmi_monitoring_ctx_errormsg (ctx));
  403. goto cleanup;
  404. }
  405. }
  406. if (sdr_sel_cache_format) {
  407. if (ipmi_monitoring_ctx_sdr_cache_filenames(ctx, sdr_sel_cache_format) < 0) {
  408. collector_error("ipmi_monitoring_ctx_sdr_cache_filenames(): %s\n", ipmi_monitoring_ctx_errormsg (ctx));
  409. goto cleanup;
  410. }
  411. }
  412. // Must call otherwise only default interpretations ever used
  413. // sel_config_file can be NULL
  414. if (ipmi_monitoring_ctx_sel_config_file (ctx, sel_config_file) < 0) {
  415. collector_error( "ipmi_monitoring_ctx_sel_config_file(): %s",
  416. ipmi_monitoring_ctx_errormsg (ctx));
  417. goto cleanup;
  418. }
  419. if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx,
  420. hostname,
  421. ipmi_config,
  422. sel_flags,
  423. NULL,
  424. 0,
  425. NULL,
  426. NULL)) < 0) {
  427. collector_error( "ipmi_monitoring_sel_by_record_id(): %s",
  428. ipmi_monitoring_ctx_errormsg (ctx));
  429. goto cleanup;
  430. }
  431. netdata_update_ipmi_sel_events_count(state, sel_count);
  432. rv = 0;
  433. cleanup:
  434. if (ctx)
  435. ipmi_monitoring_ctx_destroy (ctx);
  436. timing_report();
  437. if(remove_reread_sdr_after_first_use)
  438. global_sel_flags &= ~(IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE);
  439. return (rv);
  440. }
  441. // ----------------------------------------------------------------------------
  442. // copied from freeipmi codebase commit 8dea6dec4012d0899901e595f2c868a05e1cefed
  443. // added netdata_ in-front to not overwrite library functions
  444. // FROM: common/miscutil/network.c
  445. static int netdata_host_is_localhost (const char *host) {
  446. /* Ordered by my assumption of most popular */
  447. if (!strcasecmp (host, "localhost")
  448. || !strcmp (host, "127.0.0.1")
  449. || !strcasecmp (host, "ipv6-localhost")
  450. || !strcmp (host, "::1")
  451. || !strcasecmp (host, "ip6-localhost")
  452. || !strcmp (host, "0:0:0:0:0:0:0:1"))
  453. return (1);
  454. return (0);
  455. }
  456. // FROM: common/parsecommon/parse-common.h
  457. #define IPMI_PARSE_DEVICE_LAN_STR "lan"
  458. #define IPMI_PARSE_DEVICE_LAN_2_0_STR "lan_2_0"
  459. #define IPMI_PARSE_DEVICE_LAN_2_0_STR2 "lan20"
  460. #define IPMI_PARSE_DEVICE_LAN_2_0_STR3 "lan_20"
  461. #define IPMI_PARSE_DEVICE_LAN_2_0_STR4 "lan2_0"
  462. #define IPMI_PARSE_DEVICE_LAN_2_0_STR5 "lanplus"
  463. #define IPMI_PARSE_DEVICE_KCS_STR "kcs"
  464. #define IPMI_PARSE_DEVICE_SSIF_STR "ssif"
  465. #define IPMI_PARSE_DEVICE_OPENIPMI_STR "openipmi"
  466. #define IPMI_PARSE_DEVICE_OPENIPMI_STR2 "open"
  467. #define IPMI_PARSE_DEVICE_SUNBMC_STR "sunbmc"
  468. #define IPMI_PARSE_DEVICE_SUNBMC_STR2 "bmc"
  469. #define IPMI_PARSE_DEVICE_INTELDCMI_STR "inteldcmi"
  470. // FROM: common/parsecommon/parse-common.c
  471. // changed the return values to match ipmi_monitoring.h
  472. static int netdata_parse_outofband_driver_type (const char *str) {
  473. if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_STR) == 0)
  474. return (IPMI_MONITORING_PROTOCOL_VERSION_1_5);
  475. /* support "lanplus" for those that might be used to ipmitool.
  476. * support typo variants to ease.
  477. */
  478. else if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR) == 0
  479. || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR2) == 0
  480. || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR3) == 0
  481. || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR4) == 0
  482. || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR5) == 0)
  483. return (IPMI_MONITORING_PROTOCOL_VERSION_2_0);
  484. return (-1);
  485. }
  486. // FROM: common/parsecommon/parse-common.c
  487. // changed the return values to match ipmi_monitoring.h
  488. static int netdata_parse_inband_driver_type (const char *str) {
  489. if (strcasecmp (str, IPMI_PARSE_DEVICE_KCS_STR) == 0)
  490. return (IPMI_MONITORING_DRIVER_TYPE_KCS);
  491. else if (strcasecmp (str, IPMI_PARSE_DEVICE_SSIF_STR) == 0)
  492. return (IPMI_MONITORING_DRIVER_TYPE_SSIF);
  493. /* support "open" for those that might be used to
  494. * ipmitool.
  495. */
  496. else if (strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR) == 0
  497. || strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR2) == 0)
  498. return (IPMI_MONITORING_DRIVER_TYPE_OPENIPMI);
  499. /* support "bmc" for those that might be used to
  500. * ipmitool.
  501. */
  502. else if (strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR) == 0
  503. || strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR2) == 0)
  504. return (IPMI_MONITORING_DRIVER_TYPE_SUNBMC);
  505. #ifdef IPMI_MONITORING_DRIVER_TYPE_INTELDCMI
  506. else if (strcasecmp (str, IPMI_PARSE_DEVICE_INTELDCMI_STR) == 0)
  507. return (IPMI_MONITORING_DRIVER_TYPE_INTELDCMI);
  508. #endif // IPMI_MONITORING_DRIVER_TYPE_INTELDCMI
  509. return (-1);
  510. }
  511. // ----------------------------------------------------------------------------
  512. // BEGIN NETDATA CODE
  513. typedef enum __attribute__((packed)) {
  514. IPMI_COLLECT_TYPE_SENSORS = (1 << 0),
  515. IPMI_COLLECT_TYPE_SEL = (1 << 1),
  516. } IPMI_COLLECTION_TYPE;
  517. struct sensor {
  518. int sensor_type;
  519. int sensor_state;
  520. int sensor_units;
  521. char *sensor_name;
  522. int sensor_reading_type;
  523. union {
  524. uint8_t bool_value;
  525. uint32_t uint32_value;
  526. double double_value;
  527. } sensor_reading;
  528. // netdata provided
  529. const char *context;
  530. const char *title;
  531. const char *units;
  532. const char *family;
  533. const char *chart_type;
  534. const char *dimension;
  535. int priority;
  536. const char *type;
  537. const char *component;
  538. int multiplier;
  539. bool do_metric;
  540. bool do_state;
  541. bool metric_chart_sent;
  542. bool state_chart_sent;
  543. usec_t last_collected_metric_ut;
  544. usec_t last_collected_state_ut;
  545. };
  546. typedef enum __attribute__((packed)) {
  547. ICS_INIT,
  548. ICS_INIT_FAILED,
  549. ICS_RUNNING,
  550. ICS_FAILED,
  551. } IPMI_COLLECTOR_STATUS;
  552. struct netdata_ipmi_state {
  553. bool debug;
  554. struct {
  555. IPMI_COLLECTOR_STATUS status;
  556. usec_t last_iteration_ut;
  557. size_t collected;
  558. usec_t now_ut;
  559. usec_t freq_ut;
  560. int priority;
  561. DICTIONARY *dict;
  562. } sensors;
  563. struct {
  564. IPMI_COLLECTOR_STATUS status;
  565. usec_t last_iteration_ut;
  566. size_t events;
  567. usec_t now_ut;
  568. usec_t freq_ut;
  569. int priority;
  570. } sel;
  571. struct {
  572. usec_t now_ut;
  573. } updates;
  574. };
  575. // ----------------------------------------------------------------------------
  576. // excluded record ids maintenance (both for sensor data and state)
  577. static int *excluded_record_ids = NULL;
  578. size_t excluded_record_ids_length = 0;
  579. static void excluded_record_ids_parse(const char *s, bool debug) {
  580. if(!s) return;
  581. while(*s) {
  582. while(*s && !isdigit(*s)) s++;
  583. if(isdigit(*s)) {
  584. char *e;
  585. unsigned long n = strtoul(s, &e, 10);
  586. s = e;
  587. if(n != 0) {
  588. excluded_record_ids = reallocz(excluded_record_ids, (excluded_record_ids_length + 1) * sizeof(int));
  589. excluded_record_ids[excluded_record_ids_length++] = (int)n;
  590. }
  591. }
  592. }
  593. if(debug) {
  594. fprintf(stderr, "%s: excluded record ids:", program_name);
  595. size_t i;
  596. for(i = 0; i < excluded_record_ids_length; i++) {
  597. fprintf(stderr, " %d", excluded_record_ids[i]);
  598. }
  599. fprintf(stderr, "\n");
  600. }
  601. }
  602. static int *excluded_status_record_ids = NULL;
  603. size_t excluded_status_record_ids_length = 0;
  604. static void excluded_status_record_ids_parse(const char *s, bool debug) {
  605. if(!s) return;
  606. while(*s) {
  607. while(*s && !isdigit(*s)) s++;
  608. if(isdigit(*s)) {
  609. char *e;
  610. unsigned long n = strtoul(s, &e, 10);
  611. s = e;
  612. if(n != 0) {
  613. excluded_status_record_ids = reallocz(excluded_status_record_ids, (excluded_status_record_ids_length + 1) * sizeof(int));
  614. excluded_status_record_ids[excluded_status_record_ids_length++] = (int)n;
  615. }
  616. }
  617. }
  618. if(debug) {
  619. fprintf(stderr, "%s: excluded status record ids:", program_name);
  620. size_t i;
  621. for(i = 0; i < excluded_status_record_ids_length; i++) {
  622. fprintf(stderr, " %d", excluded_status_record_ids[i]);
  623. }
  624. fprintf(stderr, "\n");
  625. }
  626. }
  627. static int excluded_record_ids_check(int record_id) {
  628. size_t i;
  629. for(i = 0; i < excluded_record_ids_length; i++) {
  630. if(excluded_record_ids[i] == record_id)
  631. return 1;
  632. }
  633. return 0;
  634. }
  635. static int excluded_status_record_ids_check(int record_id) {
  636. size_t i;
  637. for(i = 0; i < excluded_status_record_ids_length; i++) {
  638. if(excluded_status_record_ids[i] == record_id)
  639. return 1;
  640. }
  641. return 0;
  642. }
  643. // ----------------------------------------------------------------------------
  644. // data collection functions
  645. struct {
  646. const char *search;
  647. SIMPLE_PATTERN *pattern;
  648. const char *label;
  649. } sensors_component_patterns[] = {
  650. // The order is important!
  651. // They are evaluated top to bottom
  652. // The first the matches is used
  653. {
  654. .search = "*DIMM*|*_DIM*|*VTT*|*VDDQ*|*ECC*|*MEM*CRC*|*MEM*BD*",
  655. .label = NETDATA_SENSOR_COMPONENT_MEMORY_MODULE,
  656. },
  657. {
  658. .search = "*CPU*|SOC_*|*VDDCR*|P*_VDD*|*_DTS|*VCORE*|*PROC*",
  659. .label = NETDATA_SENSOR_COMPONENT_PROCESSOR,
  660. },
  661. {
  662. .search = "IPU*",
  663. .label = NETDATA_SENSOR_COMPONENT_IPU,
  664. },
  665. {
  666. .search = "M2_*|*SSD*|*HSC*|*HDD*|*NVME*",
  667. .label = NETDATA_SENSOR_COMPONENT_STORAGE,
  668. },
  669. {
  670. .search = "MB_*|*PCH*|*VBAT*|*I/O*BD*|*IO*BD*",
  671. .label = NETDATA_SENSOR_COMPONENT_MOTHERBOARD,
  672. },
  673. {
  674. .search = "Watchdog|SEL|SYS_*|*CHASSIS*",
  675. .label = NETDATA_SENSOR_COMPONENT_SYSTEM,
  676. },
  677. {
  678. .search = "PS*|P_*|*PSU*|*PWR*|*TERMV*|*D2D*",
  679. .label = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY,
  680. },
  681. // fallback components
  682. {
  683. .search = "VR_P*|*VRMP*",
  684. .label = NETDATA_SENSOR_COMPONENT_PROCESSOR,
  685. },
  686. {
  687. .search = "*VSB*|*PS*",
  688. .label = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY,
  689. },
  690. {
  691. .search = "*MEM*|*MEM*RAID*",
  692. .label = NETDATA_SENSOR_COMPONENT_MEMORY,
  693. },
  694. {
  695. .search = "*RAID*", // there is also "Memory RAID", so keep this after memory
  696. .label = NETDATA_SENSOR_COMPONENT_STORAGE,
  697. },
  698. {
  699. .search = "*PERIPHERAL*|*USB*",
  700. .label = NETDATA_SENSOR_COMPONENT_PERIPHERAL,
  701. },
  702. {
  703. .search = "*FAN*|*12V*|*VCC*|*PCI*|*CHIPSET*|*AMP*|*BD*",
  704. .label = NETDATA_SENSOR_COMPONENT_SYSTEM,
  705. },
  706. // terminator
  707. {
  708. .search = NULL,
  709. .label = NULL,
  710. }
  711. };
  712. static const char *netdata_sensor_name_to_component(const char *sensor_name) {
  713. for(int i = 0; sensors_component_patterns[i].search ;i++) {
  714. if(!sensors_component_patterns[i].pattern)
  715. sensors_component_patterns[i].pattern = simple_pattern_create(sensors_component_patterns[i].search, "|", SIMPLE_PATTERN_EXACT, false);
  716. if(simple_pattern_matches(sensors_component_patterns[i].pattern, sensor_name))
  717. return sensors_component_patterns[i].label;
  718. }
  719. return "Other";
  720. }
  721. const char *netdata_collect_type_to_string(IPMI_COLLECTION_TYPE type) {
  722. if((type & (IPMI_COLLECT_TYPE_SENSORS|IPMI_COLLECT_TYPE_SEL)) == (IPMI_COLLECT_TYPE_SENSORS|IPMI_COLLECT_TYPE_SEL))
  723. return "sensors,sel";
  724. if(type & IPMI_COLLECT_TYPE_SEL)
  725. return "sel";
  726. if(type & IPMI_COLLECT_TYPE_SENSORS)
  727. return "sensors";
  728. return "unknown";
  729. }
  730. static void netdata_sensor_set_value(struct sensor *sn, void *sensor_reading, struct netdata_ipmi_state *state __maybe_unused) {
  731. switch(sn->sensor_reading_type) {
  732. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
  733. sn->sensor_reading.bool_value = *((uint8_t *)sensor_reading);
  734. break;
  735. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
  736. sn->sensor_reading.uint32_value = *((uint32_t *)sensor_reading);
  737. break;
  738. case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
  739. sn->sensor_reading.double_value = *((double *)sensor_reading);
  740. break;
  741. default:
  742. case IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN:
  743. sn->do_metric = false;
  744. break;
  745. }
  746. }
  747. static void netdata_update_ipmi_sensor_reading(
  748. int record_id
  749. , int sensor_number
  750. , int sensor_type
  751. , int sensor_state
  752. , int sensor_units
  753. , int sensor_reading_type
  754. , char *sensor_name
  755. , void *sensor_reading
  756. , int event_reading_type_code __maybe_unused
  757. , int sensor_bitmask_type __maybe_unused
  758. , int sensor_bitmask __maybe_unused
  759. , char **sensor_bitmask_strings __maybe_unused
  760. , struct netdata_ipmi_state *state
  761. ) {
  762. if(unlikely(sensor_state == IPMI_MONITORING_STATE_UNKNOWN &&
  763. sensor_type == IPMI_MONITORING_SENSOR_TYPE_UNKNOWN &&
  764. sensor_units == IPMI_MONITORING_SENSOR_UNITS_UNKNOWN &&
  765. sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN &&
  766. (!sensor_name || !*sensor_name)))
  767. // we can't do anything about this sensor - everything is unknown
  768. return;
  769. if(unlikely(!sensor_name || !*sensor_name))
  770. sensor_name = "UNNAMED";
  771. state->sensors.collected++;
  772. char key[SENSORS_DICT_KEY_SIZE + 1];
  773. snprintfz(key, SENSORS_DICT_KEY_SIZE, "i%d_n%d_t%d_u%d_%s",
  774. record_id, sensor_number, sensor_reading_type, sensor_units, sensor_name);
  775. // find the sensor record
  776. const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(state->sensors.dict, key);
  777. if(likely(item)) {
  778. // recurring collection
  779. if(state->debug)
  780. fprintf(stderr, "%s: reusing sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n",
  781. program_name, sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type);
  782. struct sensor *sn = dictionary_acquired_item_value(item);
  783. if(sensor_reading) {
  784. netdata_sensor_set_value(sn, sensor_reading, state);
  785. sn->last_collected_metric_ut = state->sensors.now_ut;
  786. }
  787. sn->sensor_state = sensor_state;
  788. sn->last_collected_state_ut = state->sensors.now_ut;
  789. dictionary_acquired_item_release(state->sensors.dict, item);
  790. return;
  791. }
  792. if(state->debug)
  793. fprintf(stderr, "Allocating new sensor data record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n",
  794. sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type);
  795. // check if it is excluded
  796. bool excluded_metric = excluded_record_ids_check(record_id);
  797. bool excluded_state = excluded_status_record_ids_check(record_id);
  798. if(excluded_metric) {
  799. if(state->debug)
  800. fprintf(stderr, "Sensor '%s' is excluded by excluded_record_ids_check()\n", sensor_name);
  801. }
  802. if(excluded_state) {
  803. if(state->debug)
  804. fprintf(stderr, "Sensor '%s' is excluded for status check, by excluded_status_record_ids_check()\n", sensor_name);
  805. }
  806. struct sensor t = {
  807. .sensor_type = sensor_type,
  808. .sensor_state = sensor_state,
  809. .sensor_units = sensor_units,
  810. .sensor_reading_type = sensor_reading_type,
  811. .sensor_name = strdupz(sensor_name),
  812. .component = netdata_sensor_name_to_component(sensor_name),
  813. .do_state = !excluded_state,
  814. .do_metric = !excluded_metric,
  815. };
  816. t.type = netdata_ipmi_get_sensor_type_string(t.sensor_type, &t.component);
  817. switch(t.sensor_units) {
  818. case IPMI_MONITORING_SENSOR_UNITS_CELSIUS:
  819. t.dimension = "temperature";
  820. t.context = "ipmi.sensor_temperature_c";
  821. t.title = "IPMI Sensor Temperature Celsius";
  822. t.units = "Celsius";
  823. t.family = "temperatures";
  824. t.chart_type = "line";
  825. t.priority = state->sensors.priority + 10;
  826. break;
  827. case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT:
  828. t.dimension = "temperature";
  829. t.context = "ipmi.sensor_temperature_f";
  830. t.title = "IPMI Sensor Temperature Fahrenheit";
  831. t.units = "Fahrenheit";
  832. t.family = "temperatures";
  833. t.chart_type = "line";
  834. t.priority = state->sensors.priority + 20;
  835. break;
  836. case IPMI_MONITORING_SENSOR_UNITS_VOLTS:
  837. t.dimension = "voltage";
  838. t.context = "ipmi.sensor_voltage";
  839. t.title = "IPMI Sensor Voltage";
  840. t.units = "Volts";
  841. t.family = "voltages";
  842. t.chart_type = "line";
  843. t.priority = state->sensors.priority + 30;
  844. break;
  845. case IPMI_MONITORING_SENSOR_UNITS_AMPS:
  846. t.dimension = "ampere";
  847. t.context = "ipmi.sensor_ampere";
  848. t.title = "IPMI Sensor Current";
  849. t.units = "Amps";
  850. t.family = "current";
  851. t.chart_type = "line";
  852. t.priority = state->sensors.priority + 40;
  853. break;
  854. case IPMI_MONITORING_SENSOR_UNITS_RPM:
  855. t.dimension = "rotations";
  856. t.context = "ipmi.sensor_fan_speed";
  857. t.title = "IPMI Sensor Fans Speed";
  858. t.units = "RPM";
  859. t.family = "fans";
  860. t.chart_type = "line";
  861. t.priority = state->sensors.priority + 50;
  862. break;
  863. case IPMI_MONITORING_SENSOR_UNITS_WATTS:
  864. t.dimension = "power";
  865. t.context = "ipmi.sensor_power";
  866. t.title = "IPMI Sensor Power";
  867. t.units = "Watts";
  868. t.family = "power";
  869. t.chart_type = "line";
  870. t.priority = state->sensors.priority + 60;
  871. break;
  872. case IPMI_MONITORING_SENSOR_UNITS_PERCENT:
  873. t.dimension = "percentage";
  874. t.context = "ipmi.sensor_reading_percent";
  875. t.title = "IPMI Sensor Reading Percentage";
  876. t.units = "%%";
  877. t.family = "other";
  878. t.chart_type = "line";
  879. t.priority = state->sensors.priority + 70;
  880. break;
  881. default:
  882. t.priority = state->sensors.priority + 80;
  883. t.do_metric = false;
  884. break;
  885. }
  886. switch(sensor_reading_type) {
  887. case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
  888. t.multiplier = 1000;
  889. break;
  890. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
  891. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
  892. t.multiplier = 1;
  893. break;
  894. default:
  895. t.do_metric = false;
  896. break;
  897. }
  898. if(sensor_reading) {
  899. netdata_sensor_set_value(&t, sensor_reading, state);
  900. t.last_collected_metric_ut = state->sensors.now_ut;
  901. }
  902. t.last_collected_state_ut = state->sensors.now_ut;
  903. dictionary_set(state->sensors.dict, key, &t, sizeof(t));
  904. }
  905. static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events) {
  906. state->sel.events = events;
  907. }
  908. int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) {
  909. errno = 0;
  910. if(type & IPMI_COLLECT_TYPE_SENSORS) {
  911. state->sensors.collected = 0;
  912. state->sensors.now_ut = now_monotonic_usec();
  913. if (netdata_read_ipmi_sensors(ipmi_config, state) < 0) return -1;
  914. }
  915. if(type & IPMI_COLLECT_TYPE_SEL) {
  916. state->sel.events = 0;
  917. state->sel.now_ut = now_monotonic_usec();
  918. if(netdata_get_ipmi_sel_events_count(ipmi_config, state) < 0) return -2;
  919. }
  920. return 0;
  921. }
  922. int netdata_ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) {
  923. int i, checks = SPEED_TEST_ITERATIONS, successful = 0;
  924. usec_t total = 0;
  925. for(i = 0 ; i < checks ; i++) {
  926. if(unlikely(state->debug))
  927. fprintf(stderr, "%s: checking %s data collection speed iteration %d of %d\n",
  928. program_name, netdata_collect_type_to_string(type), i + 1, checks);
  929. // measure the time a data collection needs
  930. usec_t start = now_realtime_usec();
  931. if(netdata_ipmi_collect_data(ipmi_config, type, state) < 0)
  932. continue;
  933. usec_t end = now_realtime_usec();
  934. successful++;
  935. if(unlikely(state->debug))
  936. fprintf(stderr, "%s: %s data collection speed was %llu usec\n",
  937. program_name, netdata_collect_type_to_string(type), end - start);
  938. // add it to our total
  939. total += end - start;
  940. // wait the same time
  941. // to avoid flooding the IPMI processor with requests
  942. sleep_usec(end - start);
  943. }
  944. if(!successful)
  945. return 0;
  946. // so, we assume it needed 2x the time
  947. // we find the average in microseconds
  948. // and we round-up to the closest second
  949. return (int)(( total * 2 / successful / USEC_PER_SEC ) + 1);
  950. }
  951. // ----------------------------------------------------------------------------
  952. // data collection threads
  953. struct ipmi_collection_thread {
  954. struct ipmi_monitoring_ipmi_config ipmi_config;
  955. int freq_s;
  956. bool debug;
  957. IPMI_COLLECTION_TYPE type;
  958. SPINLOCK spinlock;
  959. struct netdata_ipmi_state state;
  960. };
  961. void *netdata_ipmi_collection_thread(void *ptr) {
  962. struct ipmi_collection_thread *t = ptr;
  963. if(t->debug) fprintf(stderr, "%s: calling initialize_ipmi_config() for %s\n",
  964. program_name, netdata_collect_type_to_string(t->type));
  965. initialize_ipmi_config(&t->ipmi_config);
  966. if(t->debug) fprintf(stderr, "%s: detecting IPMI minimum update frequency for %s...\n",
  967. program_name, netdata_collect_type_to_string(t->type));
  968. int freq_s = netdata_ipmi_detect_speed_secs(&t->ipmi_config, t->type, &t->state);
  969. if(!freq_s) {
  970. if(t->type & IPMI_COLLECT_TYPE_SENSORS) {
  971. t->state.sensors.status = ICS_INIT_FAILED;
  972. t->state.sensors.last_iteration_ut = 0;
  973. }
  974. if(t->type & IPMI_COLLECT_TYPE_SEL) {
  975. t->state.sel.status = ICS_INIT_FAILED;
  976. t->state.sel.last_iteration_ut = 0;
  977. }
  978. return ptr;
  979. }
  980. else {
  981. if(t->type & IPMI_COLLECT_TYPE_SENSORS) {
  982. t->state.sensors.status = ICS_RUNNING;
  983. }
  984. if(t->type & IPMI_COLLECT_TYPE_SEL) {
  985. t->state.sel.status = ICS_RUNNING;
  986. }
  987. }
  988. t->freq_s = freq_s = MAX(t->freq_s, freq_s);
  989. if(t->debug) {
  990. fprintf(stderr, "%s: IPMI minimum update frequency of %s was calculated to %d seconds.\n",
  991. program_name, netdata_collect_type_to_string(t->type), t->freq_s);
  992. fprintf(stderr, "%s: starting data collection of %s\n",
  993. program_name, netdata_collect_type_to_string(t->type));
  994. }
  995. size_t iteration = 0, failures = 0;
  996. usec_t step = t->freq_s * USEC_PER_SEC;
  997. heartbeat_t hb;
  998. heartbeat_init(&hb);
  999. while(++iteration) {
  1000. heartbeat_next(&hb, step);
  1001. if(t->debug)
  1002. fprintf(stderr, "%s: calling netdata_ipmi_collect_data() for %s\n",
  1003. program_name, netdata_collect_type_to_string(t->type));
  1004. struct netdata_ipmi_state tmp_state = t->state;
  1005. if(t->type & IPMI_COLLECT_TYPE_SENSORS) {
  1006. tmp_state.sensors.last_iteration_ut = now_monotonic_usec();
  1007. tmp_state.sensors.freq_ut = t->freq_s * USEC_PER_SEC;
  1008. }
  1009. if(t->type & IPMI_COLLECT_TYPE_SEL) {
  1010. tmp_state.sel.last_iteration_ut = now_monotonic_usec();
  1011. tmp_state.sel.freq_ut = t->freq_s * USEC_PER_SEC;
  1012. }
  1013. if(netdata_ipmi_collect_data(&t->ipmi_config, t->type, &tmp_state) != 0)
  1014. failures++;
  1015. else
  1016. failures = 0;
  1017. if(failures > 10) {
  1018. collector_error("%s() failed to collect %s data for %zu consecutive times, having made %zu iterations.",
  1019. __FUNCTION__, netdata_collect_type_to_string(t->type), failures, iteration);
  1020. if(t->type & IPMI_COLLECT_TYPE_SENSORS) {
  1021. t->state.sensors.status = ICS_FAILED;
  1022. t->state.sensors.last_iteration_ut = 0;
  1023. }
  1024. if(t->type & IPMI_COLLECT_TYPE_SEL) {
  1025. t->state.sel.status = ICS_FAILED;
  1026. t->state.sel.last_iteration_ut = 0;
  1027. }
  1028. break;
  1029. }
  1030. spinlock_lock(&t->spinlock);
  1031. t->state = tmp_state;
  1032. spinlock_unlock(&t->spinlock);
  1033. }
  1034. return ptr;
  1035. }
  1036. // ----------------------------------------------------------------------------
  1037. // sending data to netdata
  1038. static inline bool is_sensor_updated(usec_t last_collected_ut, usec_t now_ut, usec_t freq) {
  1039. return (now_ut - last_collected_ut < freq * 2) ? true : false;
  1040. }
  1041. static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *state) {
  1042. if(state->sensors.status != ICS_RUNNING) {
  1043. if(unlikely(state->debug))
  1044. fprintf(stderr, "%s: %s() sensors state is not RUNNING\n",
  1045. program_name, __FUNCTION__ );
  1046. return 0;
  1047. }
  1048. size_t total_sensors_sent = 0;
  1049. int update_every = (int)(state->sensors.freq_ut / USEC_PER_SEC);
  1050. struct sensor *sn;
  1051. // generate the CHART/DIMENSION lines, if we have to
  1052. dfe_start_reentrant(state->sensors.dict, sn) {
  1053. if(unlikely(!sn->do_metric && !sn->do_state))
  1054. continue;
  1055. bool did_metric = false, did_state = false;
  1056. if(likely(sn->do_metric)) {
  1057. if(unlikely(!is_sensor_updated(sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut))) {
  1058. if(unlikely(state->debug))
  1059. fprintf(stderr, "%s: %s() sensor '%s' metric is not UPDATED (last updated %llu, now %llu, freq %llu\n",
  1060. program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut);
  1061. }
  1062. else {
  1063. if (unlikely(!sn->metric_chart_sent)) {
  1064. sn->metric_chart_sent = true;
  1065. printf("CHART '%s_%s' '' '%s' '%s' '%s' '%s' '%s' %d %d '' '%s' '%s'\n",
  1066. sn->context, sn_dfe.name, sn->title, sn->units, sn->family, sn->context,
  1067. sn->chart_type, sn->priority + 1, update_every, program_name, "sensors");
  1068. printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name);
  1069. printf("CLABEL 'type' '%s' 1\n", sn->type);
  1070. printf("CLABEL 'component' '%s' 1\n", sn->component);
  1071. printf("CLABEL_COMMIT\n");
  1072. printf("DIMENSION '%s' '' absolute 1 %d\n", sn->dimension, sn->multiplier);
  1073. }
  1074. printf("BEGIN '%s_%s'\n", sn->context, sn_dfe.name);
  1075. switch (sn->sensor_reading_type) {
  1076. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
  1077. printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.uint32_value
  1078. );
  1079. break;
  1080. case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
  1081. printf("SET '%s' = %lld\n", sn->dimension,
  1082. (long long int) (sn->sensor_reading.double_value * sn->multiplier)
  1083. );
  1084. break;
  1085. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
  1086. printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.bool_value
  1087. );
  1088. break;
  1089. default:
  1090. case IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN:
  1091. // this should never happen because we also do the same check at netdata_get_sensor()
  1092. sn->do_metric = false;
  1093. break;
  1094. }
  1095. printf("END\n");
  1096. did_metric = true;
  1097. }
  1098. }
  1099. if(likely(sn->do_state)) {
  1100. if(unlikely(!is_sensor_updated(sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut))) {
  1101. if (unlikely(state->debug))
  1102. fprintf(stderr, "%s: %s() sensor '%s' state is not UPDATED (last updated %llu, now %llu, freq %llu\n",
  1103. program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut);
  1104. }
  1105. else {
  1106. if (unlikely(!sn->state_chart_sent)) {
  1107. sn->state_chart_sent = true;
  1108. printf("CHART 'ipmi.sensor_state_%s' '' 'IPMI Sensor State' 'state' 'states' 'ipmi.sensor_state' 'line' %d %d '' '%s' '%s'\n",
  1109. sn_dfe.name, sn->priority, update_every, program_name, "sensors");
  1110. printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name);
  1111. printf("CLABEL 'type' '%s' 1\n", sn->type);
  1112. printf("CLABEL 'component' '%s' 1\n", sn->component);
  1113. printf("CLABEL_COMMIT\n");
  1114. printf("DIMENSION 'nominal' '' absolute 1 1\n");
  1115. printf("DIMENSION 'warning' '' absolute 1 1\n");
  1116. printf("DIMENSION 'critical' '' absolute 1 1\n");
  1117. printf("DIMENSION 'unknown' '' absolute 1 1\n");
  1118. }
  1119. printf("BEGIN 'ipmi.sensor_state_%s'\n", sn_dfe.name);
  1120. printf("SET 'nominal' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_NOMINAL ? 1LL : 0LL);
  1121. printf("SET 'warning' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_WARNING ? 1LL : 0LL);
  1122. printf("SET 'critical' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_CRITICAL ? 1LL : 0LL);
  1123. printf("SET 'unknown' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_UNKNOWN ? 1LL : 0LL);
  1124. printf("END\n");
  1125. did_state = true;
  1126. }
  1127. }
  1128. if(likely(did_metric || did_state))
  1129. total_sensors_sent++;
  1130. }
  1131. dfe_done(sn);
  1132. return total_sensors_sent;
  1133. }
  1134. static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state) {
  1135. static bool sel_chart_generated = false;
  1136. if(likely(state->sel.status == ICS_RUNNING)) {
  1137. if(unlikely(!sel_chart_generated)) {
  1138. sel_chart_generated = true;
  1139. printf("CHART ipmi.events '' 'IPMI Events' 'events' 'events' ipmi.sel area %d %d '' '%s' '%s'\n"
  1140. , state->sel.priority + 2
  1141. , (int)(state->sel.freq_ut / USEC_PER_SEC)
  1142. , program_name
  1143. , "sel"
  1144. );
  1145. printf("DIMENSION events '' absolute 1 1\n");
  1146. }
  1147. printf(
  1148. "BEGIN ipmi.events\n"
  1149. "SET events = %zu\n"
  1150. "END\n"
  1151. , state->sel.events
  1152. );
  1153. }
  1154. return state->sel.events;
  1155. }
  1156. // ----------------------------------------------------------------------------
  1157. // main, command line arguments parsing
  1158. int main (int argc, char **argv) {
  1159. bool netdata_do_sel = IPMI_ENABLE_SEL_BY_DEFAULT;
  1160. stderror = stderr;
  1161. clocks_init();
  1162. int update_every = IPMI_SENSORS_MIN_UPDATE_EVERY; // this is the minimum update frequency
  1163. int update_every_sel = IPMI_SEL_MIN_UPDATE_EVERY; // this is the minimum update frequency for SEL events
  1164. bool debug = false;
  1165. // ------------------------------------------------------------------------
  1166. // initialization of netdata plugin
  1167. program_name = "freeipmi.plugin";
  1168. // disable syslog
  1169. error_log_syslog = 0;
  1170. // set errors flood protection to 100 logs per hour
  1171. error_log_errors_per_period = 100;
  1172. error_log_throttle_period = 3600;
  1173. // initialize the threads
  1174. netdata_threads_init_for_external_plugins(0); // set the default threads stack size here
  1175. // ------------------------------------------------------------------------
  1176. // parse command line parameters
  1177. int i, freq_s = 0;
  1178. for(i = 1; i < argc ; i++) {
  1179. if(isdigit(*argv[i]) && !freq_s) {
  1180. int n = str2i(argv[i]);
  1181. if(n > 0 && n < 86400) {
  1182. freq_s = n;
  1183. continue;
  1184. }
  1185. }
  1186. else if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) {
  1187. printf("%s %s\n", program_name, VERSION);
  1188. exit(0);
  1189. }
  1190. else if(strcmp("debug", argv[i]) == 0) {
  1191. debug = true;
  1192. continue;
  1193. }
  1194. else if(strcmp("sel", argv[i]) == 0) {
  1195. netdata_do_sel = true;
  1196. continue;
  1197. }
  1198. else if(strcmp("no-sel", argv[i]) == 0) {
  1199. netdata_do_sel = false;
  1200. continue;
  1201. }
  1202. else if(strcmp("reread-sdr-cache", argv[i]) == 0) {
  1203. global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE;
  1204. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE;
  1205. remove_reread_sdr_after_first_use = false;
  1206. if (debug) fprintf(stderr, "%s: reread-sdr-cache enabled for both sensors and SEL\n", program_name);
  1207. }
  1208. else if(strcmp("interpret-oem-data", argv[i]) == 0) {
  1209. global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_INTERPRET_OEM_DATA;
  1210. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_INTERPRET_OEM_DATA;
  1211. if (debug) fprintf(stderr, "%s: interpret-oem-data enabled for both sensors and SEL\n", program_name);
  1212. }
  1213. else if(strcmp("assume-system-event-record", argv[i]) == 0) {
  1214. global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_ASSUME_SYSTEM_EVENT_RECORD;
  1215. if (debug) fprintf(stderr, "%s: assume-system-event-record enabled\n", program_name);
  1216. }
  1217. else if(strcmp("ignore-non-interpretable-sensors", argv[i]) == 0) {
  1218. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_NON_INTERPRETABLE_SENSORS;
  1219. if (debug) fprintf(stderr, "%s: ignore-non-interpretable-sensors enabled\n", program_name);
  1220. }
  1221. else if(strcmp("bridge-sensors", argv[i]) == 0) {
  1222. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_BRIDGE_SENSORS;
  1223. if (debug) fprintf(stderr, "%s: bridge-sensors enabled\n", program_name);
  1224. }
  1225. else if(strcmp("shared-sensors", argv[i]) == 0) {
  1226. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_SHARED_SENSORS;
  1227. if (debug) fprintf(stderr, "%s: shared-sensors enabled\n", program_name);
  1228. }
  1229. else if(strcmp("no-discrete-reading", argv[i]) == 0) {
  1230. global_sensor_reading_flags &= ~(IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING);
  1231. if (debug) fprintf(stderr, "%s: discrete-reading disabled\n", program_name);
  1232. }
  1233. else if(strcmp("ignore-scanning-disabled", argv[i]) == 0) {
  1234. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_SCANNING_DISABLED;
  1235. if (debug) fprintf(stderr, "%s: ignore-scanning-disabled enabled\n", program_name);
  1236. }
  1237. else if(strcmp("assume-bmc-owner", argv[i]) == 0) {
  1238. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ASSUME_BMC_OWNER;
  1239. if (debug) fprintf(stderr, "%s: assume-bmc-owner enabled\n", program_name);
  1240. }
  1241. #if defined(IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES) && defined(IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES)
  1242. else if(strcmp("entity-sensor-names", argv[i]) == 0) {
  1243. global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES;
  1244. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES;
  1245. if (debug) fprintf(stderr, "%s: entity-sensor-names enabled for both sensors and SEL\n", program_name);
  1246. }
  1247. #endif
  1248. else if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) {
  1249. fprintf(stderr,
  1250. "\n"
  1251. " netdata %s %s\n"
  1252. " Copyright (C) 2023 Netdata Inc.\n"
  1253. " Released under GNU General Public License v3 or later.\n"
  1254. " All rights reserved.\n"
  1255. "\n"
  1256. " This program is a data collector plugin for netdata.\n"
  1257. "\n"
  1258. " Available command line options:\n"
  1259. "\n"
  1260. " SECONDS data collection frequency\n"
  1261. " minimum: %d\n"
  1262. "\n"
  1263. " debug enable verbose output\n"
  1264. " default: disabled\n"
  1265. "\n"
  1266. " sel\n"
  1267. " no-sel enable/disable SEL collection\n"
  1268. " default: %s\n"
  1269. "\n"
  1270. " reread-sdr-cache re-read SDR cache on every iteration\n"
  1271. " default: disabled\n"
  1272. "\n"
  1273. " interpret-oem-data attempt to parse OEM data\n"
  1274. " default: disabled\n"
  1275. "\n"
  1276. " assume-system-event-record \n"
  1277. " tread illegal SEL events records as normal\n"
  1278. " default: disabled\n"
  1279. "\n"
  1280. " ignore-non-interpretable-sensors \n"
  1281. " do not read sensors that cannot be interpreted\n"
  1282. " default: disabled\n"
  1283. "\n"
  1284. " bridge-sensors bridge sensors not owned by the BMC\n"
  1285. " default: disabled\n"
  1286. "\n"
  1287. " shared-sensors enable shared sensors, if found\n"
  1288. " default: disabled\n"
  1289. "\n"
  1290. " no-discrete-reading do not read sensors that their event/reading type code is invalid\n"
  1291. " default: enabled\n"
  1292. "\n"
  1293. " ignore-scanning-disabled \n"
  1294. " Ignore the scanning bit and read sensors no matter what\n"
  1295. " default: disabled\n"
  1296. "\n"
  1297. " assume-bmc-owner assume the BMC is the sensor owner no matter what\n"
  1298. " (usually bridging is required too)\n"
  1299. " default: disabled\n"
  1300. "\n"
  1301. #if defined(IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES) && defined(IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES)
  1302. " entity-sensor-names sensor names prefixed with entity id and instance\n"
  1303. " default: disabled\n"
  1304. "\n"
  1305. #endif
  1306. " hostname HOST\n"
  1307. " username USER\n"
  1308. " password PASS connect to remote IPMI host\n"
  1309. " default: local IPMI processor\n"
  1310. "\n"
  1311. " no-auth-code-check\n"
  1312. " noauthcodecheck don't check the authentication codes returned\n"
  1313. "\n"
  1314. " driver-type IPMIDRIVER\n"
  1315. " Specify the driver type to use instead of doing an auto selection. \n"
  1316. " The currently available outofband drivers are LAN and LAN_2_0,\n"
  1317. " which perform IPMI 1.5 and IPMI 2.0 respectively. \n"
  1318. " The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC.\n"
  1319. "\n"
  1320. " sdr-cache-dir PATH directory for SDR cache files\n"
  1321. " default: %s\n"
  1322. "\n"
  1323. " sensor-config-file FILE filename to read sensor configuration\n"
  1324. " default: %s\n"
  1325. "\n"
  1326. " sel-config-file FILE filename to read sel configuration\n"
  1327. " default: %s\n"
  1328. "\n"
  1329. " ignore N1,N2,N3,... sensor IDs to ignore\n"
  1330. " default: none\n"
  1331. "\n"
  1332. " ignore-status N1,N2,N3,... sensor IDs to ignore status (nominal/warning/critical)\n"
  1333. " default: none\n"
  1334. "\n"
  1335. " -v\n"
  1336. " -V\n"
  1337. " version print version and exit\n"
  1338. "\n"
  1339. " Linux kernel module for IPMI is CPU hungry.\n"
  1340. " On Linux run this to lower kipmiN CPU utilization:\n"
  1341. " # echo 10 > /sys/module/ipmi_si/parameters/kipmid_max_busy_us\n"
  1342. "\n"
  1343. " or create: /etc/modprobe.d/ipmi.conf with these contents:\n"
  1344. " options ipmi_si kipmid_max_busy_us=10\n"
  1345. "\n"
  1346. " For more information:\n"
  1347. " https://github.com/netdata/netdata/tree/master/collectors/freeipmi.plugin\n"
  1348. "\n"
  1349. , program_name, VERSION
  1350. , update_every
  1351. , netdata_do_sel?"enabled":"disabled"
  1352. , sdr_cache_directory?sdr_cache_directory:"system default"
  1353. , sensor_config_file?sensor_config_file:"system default"
  1354. , sel_config_file?sel_config_file:"system default"
  1355. );
  1356. exit(1);
  1357. }
  1358. else if(i < argc && strcmp("hostname", argv[i]) == 0) {
  1359. hostname = strdupz(argv[++i]);
  1360. char *s = argv[i];
  1361. // mask it be hidden from the process tree
  1362. while(*s) *s++ = 'x';
  1363. if(debug) fprintf(stderr, "%s: hostname set to '%s'\n", program_name, hostname);
  1364. continue;
  1365. }
  1366. else if(i < argc && strcmp("username", argv[i]) == 0) {
  1367. username = strdupz(argv[++i]);
  1368. char *s = argv[i];
  1369. // mask it be hidden from the process tree
  1370. while(*s) *s++ = 'x';
  1371. if(debug) fprintf(stderr, "%s: username set to '%s'\n", program_name, username);
  1372. continue;
  1373. }
  1374. else if(i < argc && strcmp("password", argv[i]) == 0) {
  1375. password = strdupz(argv[++i]);
  1376. char *s = argv[i];
  1377. // mask it be hidden from the process tree
  1378. while(*s) *s++ = 'x';
  1379. if(debug) fprintf(stderr, "%s: password set to '%s'\n", program_name, password);
  1380. continue;
  1381. }
  1382. else if(strcmp("driver-type", argv[i]) == 0) {
  1383. if (hostname) {
  1384. protocol_version = netdata_parse_outofband_driver_type(argv[++i]);
  1385. if(debug) fprintf(stderr, "%s: outband protocol version set to '%d'\n",
  1386. program_name, protocol_version);
  1387. }
  1388. else {
  1389. driver_type = netdata_parse_inband_driver_type(argv[++i]);
  1390. if(debug) fprintf(stderr, "%s: inband driver type set to '%d'\n",
  1391. program_name, driver_type);
  1392. }
  1393. continue;
  1394. } else if (i < argc && (strcmp("noauthcodecheck", argv[i]) == 0 || strcmp("no-auth-code-check", argv[i]) == 0)) {
  1395. if (!hostname || netdata_host_is_localhost(hostname)) {
  1396. if (debug)
  1397. fprintf(stderr, "%s: noauthcodecheck workaround flag is ignored for inband configuration\n",
  1398. program_name);
  1399. }
  1400. else if (protocol_version < 0 || protocol_version == IPMI_MONITORING_PROTOCOL_VERSION_1_5) {
  1401. workaround_flags |= IPMI_MONITORING_WORKAROUND_FLAGS_PROTOCOL_VERSION_1_5_NO_AUTH_CODE_CHECK;
  1402. if (debug)
  1403. fprintf(stderr, "%s: noauthcodecheck workaround flag enabled\n", program_name);
  1404. }
  1405. else {
  1406. if (debug)
  1407. fprintf(stderr, "%s: noauthcodecheck workaround flag is ignored for protocol version 2.0\n",
  1408. program_name);
  1409. }
  1410. continue;
  1411. }
  1412. else if(i < argc && strcmp("sdr-cache-dir", argv[i]) == 0) {
  1413. sdr_cache_directory = argv[++i];
  1414. if(debug)
  1415. fprintf(stderr, "%s: SDR cache directory set to '%s'\n", program_name, sdr_cache_directory);
  1416. continue;
  1417. }
  1418. else if(i < argc && strcmp("sensor-config-file", argv[i]) == 0) {
  1419. sensor_config_file = argv[++i];
  1420. if(debug) fprintf(stderr, "%s: sensor config file set to '%s'\n", program_name, sensor_config_file);
  1421. continue;
  1422. }
  1423. else if(i < argc && strcmp("sel-config-file", argv[i]) == 0) {
  1424. sel_config_file = argv[++i];
  1425. if(debug) fprintf(stderr, "%s: sel config file set to '%s'\n", program_name, sel_config_file);
  1426. continue;
  1427. }
  1428. else if(i < argc && strcmp("ignore", argv[i]) == 0) {
  1429. excluded_record_ids_parse(argv[++i], debug);
  1430. continue;
  1431. }
  1432. else if(i < argc && strcmp("ignore-status", argv[i]) == 0) {
  1433. excluded_status_record_ids_parse(argv[++i], debug);
  1434. continue;
  1435. }
  1436. collector_error("%s(): ignoring parameter '%s'", __FUNCTION__, argv[i]);
  1437. }
  1438. errno = 0;
  1439. if(freq_s && freq_s < update_every)
  1440. collector_error("%s(): update frequency %d seconds is too small for IPMI. Using %d.",
  1441. __FUNCTION__, freq_s, update_every);
  1442. update_every = freq_s = MAX(freq_s, update_every);
  1443. update_every_sel = MAX(update_every, update_every_sel);
  1444. // ------------------------------------------------------------------------
  1445. // initialize IPMI
  1446. if(debug) {
  1447. fprintf(stderr, "%s: calling ipmi_monitoring_init()\n", program_name);
  1448. ipmimonitoring_init_flags |= IPMI_MONITORING_FLAGS_DEBUG|IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS;
  1449. }
  1450. int rc;
  1451. if(ipmi_monitoring_init(ipmimonitoring_init_flags, &rc) < 0)
  1452. fatal("ipmi_monitoring_init: %s", ipmi_monitoring_ctx_strerror(rc));
  1453. // ------------------------------------------------------------------------
  1454. // create the data collection threads
  1455. struct ipmi_collection_thread sensors_data = {
  1456. .type = IPMI_COLLECT_TYPE_SENSORS,
  1457. .freq_s = update_every,
  1458. .spinlock = NETDATA_SPINLOCK_INITIALIZER,
  1459. .debug = debug,
  1460. .state = {
  1461. .debug = debug,
  1462. .sensors = {
  1463. .status = ICS_INIT,
  1464. .last_iteration_ut = now_monotonic_usec(),
  1465. .freq_ut = update_every * USEC_PER_SEC,
  1466. .priority = IPMI_SENSORS_DASHBOARD_PRIORITY,
  1467. .dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE|DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct sensor)),
  1468. },
  1469. },
  1470. }, sel_data = {
  1471. .type = IPMI_COLLECT_TYPE_SEL,
  1472. .freq_s = update_every_sel,
  1473. .spinlock = NETDATA_SPINLOCK_INITIALIZER,
  1474. .debug = debug,
  1475. .state = {
  1476. .debug = debug,
  1477. .sel = {
  1478. .status = ICS_INIT,
  1479. .last_iteration_ut = now_monotonic_usec(),
  1480. .freq_ut = update_every_sel * USEC_PER_SEC,
  1481. .priority = IPMI_SEL_DASHBOARD_PRIORITY,
  1482. },
  1483. },
  1484. };
  1485. netdata_thread_t sensors_thread = 0, sel_thread = 0;
  1486. netdata_thread_create(&sensors_thread, "IPMI[sensors]", NETDATA_THREAD_OPTION_DONT_LOG, netdata_ipmi_collection_thread, &sensors_data);
  1487. if(netdata_do_sel)
  1488. netdata_thread_create(&sel_thread, "IPMI[sel]", NETDATA_THREAD_OPTION_DONT_LOG, netdata_ipmi_collection_thread, &sel_data);
  1489. // ------------------------------------------------------------------------
  1490. // the main loop
  1491. if(debug) fprintf(stderr, "%s: starting data collection\n", program_name);
  1492. time_t started_t = now_monotonic_sec();
  1493. size_t iteration = 0;
  1494. usec_t step = 100 * USEC_PER_MS;
  1495. bool global_chart_created = false;
  1496. bool tty = isatty(fileno(stderr)) == 1;
  1497. heartbeat_t hb;
  1498. heartbeat_init(&hb);
  1499. for(iteration = 0; 1 ; iteration++) {
  1500. usec_t dt = heartbeat_next(&hb, step);
  1501. if (!tty) {
  1502. fprintf(stdout, "\n"); // keepalive to avoid parser read timeout (2 minutes) during ipmi_detect_speed_secs()
  1503. fflush(stdout);
  1504. }
  1505. struct netdata_ipmi_state state = {0 };
  1506. spinlock_lock(&sensors_data.spinlock);
  1507. state.sensors = sensors_data.state.sensors;
  1508. spinlock_unlock(&sensors_data.spinlock);
  1509. spinlock_lock(&sel_data.spinlock);
  1510. state.sel = sel_data.state.sel;
  1511. spinlock_unlock(&sel_data.spinlock);
  1512. switch(state.sensors.status) {
  1513. case ICS_RUNNING:
  1514. step = update_every * USEC_PER_SEC;
  1515. if(state.sensors.last_iteration_ut < now_monotonic_usec() - IPMI_RESTART_IF_SENSORS_DONT_ITERATE_EVERY_SECONDS * USEC_PER_SEC) {
  1516. collector_error("%s(): sensors have not be collected for %zu seconds. Exiting to restart.",
  1517. __FUNCTION__, (size_t)((now_monotonic_usec() - state.sensors.last_iteration_ut) / USEC_PER_SEC));
  1518. fprintf(stdout, "EXIT\n");
  1519. fflush(stdout);
  1520. exit(0);
  1521. }
  1522. break;
  1523. case ICS_INIT:
  1524. continue;
  1525. case ICS_INIT_FAILED:
  1526. collector_error("%s(): sensors failed to initialize. Calling DISABLE.", __FUNCTION__);
  1527. fprintf(stdout, "DISABLE\n");
  1528. fflush(stdout);
  1529. exit(0);
  1530. case ICS_FAILED:
  1531. collector_error("%s(): sensors fails repeatedly to collect metrics. Exiting to restart.", __FUNCTION__);
  1532. fprintf(stdout, "EXIT\n");
  1533. fflush(stdout);
  1534. exit(0);
  1535. }
  1536. if(netdata_do_sel) {
  1537. switch (state.sensors.status) {
  1538. case ICS_RUNNING:
  1539. case ICS_INIT:
  1540. break;
  1541. case ICS_INIT_FAILED:
  1542. case ICS_FAILED:
  1543. collector_error("%s(): SEL fails to collect events. Disabling SEL collection.", __FUNCTION__);
  1544. netdata_do_sel = false;
  1545. break;
  1546. }
  1547. }
  1548. if(unlikely(debug))
  1549. fprintf(stderr, "%s: calling send_ipmi_sensor_metrics_to_netdata()\n", program_name);
  1550. state.updates.now_ut = now_monotonic_usec();
  1551. send_ipmi_sensor_metrics_to_netdata(&state);
  1552. if(netdata_do_sel)
  1553. send_ipmi_sel_metrics_to_netdata(&state);
  1554. if(unlikely(debug))
  1555. fprintf(stderr, "%s: iteration %zu, dt %llu usec, sensors ever collected %zu, sensors last collected %zu \n"
  1556. , program_name
  1557. , iteration
  1558. , dt
  1559. , dictionary_entries(state.sensors.dict)
  1560. , state.sensors.collected
  1561. );
  1562. if (!global_chart_created) {
  1563. global_chart_created = true;
  1564. fprintf(stdout,
  1565. "CHART netdata.freeipmi_availability_status '' 'Plugin availability status' 'status' "
  1566. "plugins netdata.plugin_availability_status line 146000 %d '' '%s' '%s'\n"
  1567. "DIMENSION available '' absolute 1 1\n",
  1568. update_every, program_name, "");
  1569. }
  1570. fprintf(stdout,
  1571. "BEGIN netdata.freeipmi_availability_status\n"
  1572. "SET available = 1\n"
  1573. "END\n");
  1574. // restart check (14400 seconds)
  1575. if (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS) {
  1576. collector_info("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__);
  1577. fprintf(stdout, "EXIT\n");
  1578. fflush(stdout);
  1579. exit(0);
  1580. }
  1581. fflush(stdout);
  1582. }
  1583. }