freeipmi_plugin.c 81 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097
  1. // SPDX-License-Identifier: GPL-3.0-or-later
  2. /*
  3. * netdata freeipmi.plugin
  4. * Copyright (C) 2023 Netdata Inc.
  5. * GPL v3+
  6. *
  7. * Based on:
  8. * ipmimonitoring-sensors.c,v 1.51 2016/11/02 23:46:24 chu11 Exp
  9. * ipmimonitoring-sel.c,v 1.51 2016/11/02 23:46:24 chu11 Exp
  10. *
  11. * Copyright (C) 2007-2015 Lawrence Livermore National Security, LLC.
  12. * Copyright (C) 2006-2007 The Regents of the University of California.
  13. * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
  14. * Written by Albert Chu <chu11@llnl.gov>
  15. * UCRL-CODE-222073
  16. */
  17. // ----------------------------------------------------------------------------
  18. // BEGIN NETDATA CODE
  19. // #define NETDATA_TIMING_REPORT 1
  20. #include "libnetdata/libnetdata.h"
  21. #include "libnetdata/required_dummies.h"
  22. #define FREEIPMI_GLOBAL_FUNCTION_SENSORS() do { \
  23. fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"ipmi-sensors\" %d \"%s\" \"top\" \"any\" %d\n", \
  24. 5, "Displays current sensor state and readings", 100); \
  25. } while(0)
  26. // component names, based on our patterns
  27. #define NETDATA_SENSOR_COMPONENT_MEMORY_MODULE "Memory Module"
  28. #define NETDATA_SENSOR_COMPONENT_MEMORY "Memory"
  29. #define NETDATA_SENSOR_COMPONENT_PROCESSOR "Processor"
  30. #define NETDATA_SENSOR_COMPONENT_IPU "Image Processor"
  31. #define NETDATA_SENSOR_COMPONENT_STORAGE "Storage"
  32. #define NETDATA_SENSOR_COMPONENT_MOTHERBOARD "Motherboard"
  33. #define NETDATA_SENSOR_COMPONENT_NETWORK "Network"
  34. #define NETDATA_SENSOR_COMPONENT_POWER_SUPPLY "Power Supply"
  35. #define NETDATA_SENSOR_COMPONENT_SYSTEM "System"
  36. #define NETDATA_SENSOR_COMPONENT_PERIPHERAL "Peripheral"
  37. // netdata plugin defaults
  38. #define SENSORS_DICT_KEY_SIZE 2048 // the max size of the key for the dictionary of sensors
  39. #define SPEED_TEST_ITERATIONS 5 // how many times to repeat data collection to decide latency
  40. #define IPMI_SENSORS_DASHBOARD_PRIORITY 90000 // the priority of the sensors charts on the dashboard
  41. #define IPMI_SEL_DASHBOARD_PRIORITY 99000 // the priority of the SEL events chart on the dashboard
  42. #define IPMI_SENSORS_MIN_UPDATE_EVERY 5 // the minimum data collection frequency for sensors
  43. #define IPMI_SEL_MIN_UPDATE_EVERY 30 // the minimum data collection frequency for SEL events
  44. #define IPMI_ENABLE_SEL_BY_DEFAULT true // true/false, to enable/disable SEL by default
  45. #define IPMI_RESTART_EVERY_SECONDS 14400 // restart the plugin every this many seconds
  46. // this is to prevent possible bugs/leaks in ipmi libraries
  47. #define IPMI_RESTART_IF_SENSORS_DONT_ITERATE_EVERY_SECONDS (10 * 60) // stale data collection detection time
  48. // forward definition of functions and structures
  49. struct netdata_ipmi_state;
  50. static void netdata_update_ipmi_sensor_reading(
  51. int record_id
  52. , int sensor_number
  53. , int sensor_type
  54. , int sensor_state
  55. , int sensor_units
  56. , int sensor_reading_type
  57. , char *sensor_name
  58. , void *sensor_reading
  59. , int event_reading_type_code
  60. , int sensor_bitmask_type
  61. , int sensor_bitmask
  62. , char **sensor_bitmask_strings
  63. , struct netdata_ipmi_state *state
  64. );
  65. static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events);
  66. // END NETDATA CODE
  67. // ----------------------------------------------------------------------------
  68. #include <stdio.h>
  69. #include <stdlib.h>
  70. #include <stdint.h>
  71. #include <string.h>
  72. #include <assert.h>
  73. #include <errno.h>
  74. #include <unistd.h>
  75. #include <sys/time.h>
  76. #include <ipmi_monitoring.h>
  77. #include <ipmi_monitoring_bitmasks.h>
  78. #include <ipmi_monitoring_offsets.h>
  79. /* Communication Configuration - Initialize accordingly */
  80. static netdata_mutex_t stdout_mutex = NETDATA_MUTEX_INITIALIZER;
  81. static bool function_plugin_should_exit = false;
  82. int update_every = IPMI_SENSORS_MIN_UPDATE_EVERY; // this is the minimum update frequency
  83. int update_every_sel = IPMI_SEL_MIN_UPDATE_EVERY; // this is the minimum update frequency for SEL events
  84. /* Hostname, NULL for In-band communication, non-null for a hostname */
  85. char *hostname = NULL;
  86. /* In-band Communication Configuration */
  87. int driver_type = -1; // IPMI_MONITORING_DRIVER_TYPE_KCS, etc. or -1 for default
  88. int disable_auto_probe = 0; /* probe for in-band device */
  89. unsigned int driver_address = 0; /* not used if probing */
  90. unsigned int register_spacing = 0; /* not used if probing */
  91. char *driver_device = NULL; /* not used if probing */
  92. /* Out-of-band Communication Configuration */
  93. int protocol_version = -1; // IPMI_MONITORING_PROTOCOL_VERSION_1_5, etc. or -1 for default
  94. char *username = "";
  95. char *password = "";
  96. unsigned char *k_g = NULL;
  97. unsigned int k_g_len = 0;
  98. int privilege_level = -1; // IPMI_MONITORING_PRIVILEGE_LEVEL_USER, etc. or -1 for default
  99. int authentication_type = -1; // IPMI_MONITORING_AUTHENTICATION_TYPE_MD5, etc. or -1 for default
  100. int cipher_suite_id = -1; /* 0 or -1 for default */
  101. int session_timeout = 0; /* 0 for default */
  102. int retransmission_timeout = 0; /* 0 for default */
  103. /* Workarounds - specify workaround flags if necessary */
  104. unsigned int workaround_flags = 0;
  105. /* Set to an appropriate alternate if desired */
  106. char *sdr_cache_directory = "/tmp";
  107. char *sdr_sensors_cache_format = ".netdata-freeipmi-sensors-%H-on-%L.sdr";
  108. char *sdr_sel_cache_format = ".netdata-freeipmi-sel-%H-on-%L.sdr";
  109. char *sensor_config_file = NULL;
  110. char *sel_config_file = NULL;
  111. // controlled via command line options
  112. unsigned int global_sel_flags = IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE;
  113. unsigned int global_sensor_reading_flags = IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING|IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE;
  114. bool remove_reread_sdr_after_first_use = true;
  115. /* Initialization flags
  116. *
  117. * Most commonly bitwise OR IPMI_MONITORING_FLAGS_DEBUG and/or
  118. * IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS for extra debugging
  119. * information.
  120. */
  121. unsigned int ipmimonitoring_init_flags = 0;
  122. // ----------------------------------------------------------------------------
  123. // functions common to sensors and SEL
  124. static void initialize_ipmi_config (struct ipmi_monitoring_ipmi_config *ipmi_config) {
  125. fatal_assert(ipmi_config);
  126. ipmi_config->driver_type = driver_type;
  127. ipmi_config->disable_auto_probe = disable_auto_probe;
  128. ipmi_config->driver_address = driver_address;
  129. ipmi_config->register_spacing = register_spacing;
  130. ipmi_config->driver_device = driver_device;
  131. ipmi_config->protocol_version = protocol_version;
  132. ipmi_config->username = username;
  133. ipmi_config->password = password;
  134. ipmi_config->k_g = k_g;
  135. ipmi_config->k_g_len = k_g_len;
  136. ipmi_config->privilege_level = privilege_level;
  137. ipmi_config->authentication_type = authentication_type;
  138. ipmi_config->cipher_suite_id = cipher_suite_id;
  139. ipmi_config->session_timeout_len = session_timeout;
  140. ipmi_config->retransmission_timeout_len = retransmission_timeout;
  141. ipmi_config->workaround_flags = workaround_flags;
  142. }
  143. static const char *netdata_ipmi_get_sensor_type_string (int sensor_type, const char **component) {
  144. switch (sensor_type) {
  145. case IPMI_MONITORING_SENSOR_TYPE_RESERVED:
  146. return ("Reserved");
  147. case IPMI_MONITORING_SENSOR_TYPE_TEMPERATURE:
  148. return ("Temperature");
  149. case IPMI_MONITORING_SENSOR_TYPE_VOLTAGE:
  150. return ("Voltage");
  151. case IPMI_MONITORING_SENSOR_TYPE_CURRENT:
  152. return ("Current");
  153. case IPMI_MONITORING_SENSOR_TYPE_FAN:
  154. return ("Fan");
  155. case IPMI_MONITORING_SENSOR_TYPE_PHYSICAL_SECURITY:
  156. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  157. return ("Physical Security");
  158. case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_SECURITY_VIOLATION_ATTEMPT:
  159. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  160. return ("Platform Security Violation Attempt");
  161. case IPMI_MONITORING_SENSOR_TYPE_PROCESSOR:
  162. *component = NETDATA_SENSOR_COMPONENT_PROCESSOR;
  163. return ("Processor");
  164. case IPMI_MONITORING_SENSOR_TYPE_POWER_SUPPLY:
  165. *component = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY;
  166. return ("Power Supply");
  167. case IPMI_MONITORING_SENSOR_TYPE_POWER_UNIT:
  168. *component = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY;
  169. return ("Power Unit");
  170. case IPMI_MONITORING_SENSOR_TYPE_COOLING_DEVICE:
  171. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  172. return ("Cooling Device");
  173. case IPMI_MONITORING_SENSOR_TYPE_OTHER_UNITS_BASED_SENSOR:
  174. return ("Other Units Based Sensor");
  175. case IPMI_MONITORING_SENSOR_TYPE_MEMORY:
  176. *component = NETDATA_SENSOR_COMPONENT_MEMORY;
  177. return ("Memory");
  178. case IPMI_MONITORING_SENSOR_TYPE_DRIVE_SLOT:
  179. *component = NETDATA_SENSOR_COMPONENT_STORAGE;
  180. return ("Drive Slot");
  181. case IPMI_MONITORING_SENSOR_TYPE_POST_MEMORY_RESIZE:
  182. *component = NETDATA_SENSOR_COMPONENT_MEMORY;
  183. return ("POST Memory Resize");
  184. case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_FIRMWARE_PROGRESS:
  185. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  186. return ("System Firmware Progress");
  187. case IPMI_MONITORING_SENSOR_TYPE_EVENT_LOGGING_DISABLED:
  188. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  189. return ("Event Logging Disabled");
  190. case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG1:
  191. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  192. return ("Watchdog 1");
  193. case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_EVENT:
  194. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  195. return ("System Event");
  196. case IPMI_MONITORING_SENSOR_TYPE_CRITICAL_INTERRUPT:
  197. return ("Critical Interrupt");
  198. case IPMI_MONITORING_SENSOR_TYPE_BUTTON_SWITCH:
  199. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  200. return ("Button/Switch");
  201. case IPMI_MONITORING_SENSOR_TYPE_MODULE_BOARD:
  202. return ("Module/Board");
  203. case IPMI_MONITORING_SENSOR_TYPE_MICROCONTROLLER_COPROCESSOR:
  204. *component = NETDATA_SENSOR_COMPONENT_PROCESSOR;
  205. return ("Microcontroller/Coprocessor");
  206. case IPMI_MONITORING_SENSOR_TYPE_ADD_IN_CARD:
  207. return ("Add In Card");
  208. case IPMI_MONITORING_SENSOR_TYPE_CHASSIS:
  209. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  210. return ("Chassis");
  211. case IPMI_MONITORING_SENSOR_TYPE_CHIP_SET:
  212. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  213. return ("Chip Set");
  214. case IPMI_MONITORING_SENSOR_TYPE_OTHER_FRU:
  215. return ("Other Fru");
  216. case IPMI_MONITORING_SENSOR_TYPE_CABLE_INTERCONNECT:
  217. return ("Cable/Interconnect");
  218. case IPMI_MONITORING_SENSOR_TYPE_TERMINATOR:
  219. return ("Terminator");
  220. case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_BOOT_INITIATED:
  221. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  222. return ("System Boot Initiated");
  223. case IPMI_MONITORING_SENSOR_TYPE_BOOT_ERROR:
  224. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  225. return ("Boot Error");
  226. case IPMI_MONITORING_SENSOR_TYPE_OS_BOOT:
  227. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  228. return ("OS Boot");
  229. case IPMI_MONITORING_SENSOR_TYPE_OS_CRITICAL_STOP:
  230. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  231. return ("OS Critical Stop");
  232. case IPMI_MONITORING_SENSOR_TYPE_SLOT_CONNECTOR:
  233. return ("Slot/Connector");
  234. case IPMI_MONITORING_SENSOR_TYPE_SYSTEM_ACPI_POWER_STATE:
  235. return ("System ACPI Power State");
  236. case IPMI_MONITORING_SENSOR_TYPE_WATCHDOG2:
  237. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  238. return ("Watchdog 2");
  239. case IPMI_MONITORING_SENSOR_TYPE_PLATFORM_ALERT:
  240. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  241. return ("Platform Alert");
  242. case IPMI_MONITORING_SENSOR_TYPE_ENTITY_PRESENCE:
  243. return ("Entity Presence");
  244. case IPMI_MONITORING_SENSOR_TYPE_MONITOR_ASIC_IC:
  245. return ("Monitor ASIC/IC");
  246. case IPMI_MONITORING_SENSOR_TYPE_LAN:
  247. *component = NETDATA_SENSOR_COMPONENT_NETWORK;
  248. return ("LAN");
  249. case IPMI_MONITORING_SENSOR_TYPE_MANAGEMENT_SUBSYSTEM_HEALTH:
  250. *component = NETDATA_SENSOR_COMPONENT_SYSTEM;
  251. return ("Management Subsystem Health");
  252. case IPMI_MONITORING_SENSOR_TYPE_BATTERY:
  253. return ("Battery");
  254. case IPMI_MONITORING_SENSOR_TYPE_SESSION_AUDIT:
  255. return ("Session Audit");
  256. case IPMI_MONITORING_SENSOR_TYPE_VERSION_CHANGE:
  257. return ("Version Change");
  258. case IPMI_MONITORING_SENSOR_TYPE_FRU_STATE:
  259. return ("FRU State");
  260. case IPMI_MONITORING_SENSOR_TYPE_UNKNOWN:
  261. return ("Unknown");
  262. default:
  263. if(sensor_type >= IPMI_MONITORING_SENSOR_TYPE_OEM_MIN && sensor_type <= IPMI_MONITORING_SENSOR_TYPE_OEM_MAX)
  264. return ("OEM");
  265. return ("Unrecognized");
  266. }
  267. }
  268. #define netdata_ipmi_get_value_int(var, func, ctx) do { \
  269. (var) = func(ctx); \
  270. if( (var) < 0) { \
  271. collector_error("%s(): call to " #func " failed: %s", \
  272. __FUNCTION__, ipmi_monitoring_ctx_errormsg(ctx)); \
  273. goto cleanup; \
  274. } \
  275. timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \
  276. } while(0)
  277. #define netdata_ipmi_get_value_ptr(var, func, ctx) do { \
  278. (var) = func(ctx); \
  279. if(!(var)) { \
  280. collector_error("%s(): call to " #func " failed: %s", \
  281. __FUNCTION__, ipmi_monitoring_ctx_errormsg(ctx)); \
  282. goto cleanup; \
  283. } \
  284. timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \
  285. } while(0)
  286. #define netdata_ipmi_get_value_no_check(var, func, ctx) do { \
  287. (var) = func(ctx); \
  288. timing_step(TIMING_STEP_FREEIPMI_READ_ ## var); \
  289. } while(0)
  290. static int netdata_read_ipmi_sensors(struct ipmi_monitoring_ipmi_config *ipmi_config, struct netdata_ipmi_state *state) {
  291. timing_init();
  292. ipmi_monitoring_ctx_t ctx = NULL;
  293. unsigned int sensor_reading_flags = global_sensor_reading_flags;
  294. int i;
  295. int sensor_count;
  296. int rv = -1;
  297. if (!(ctx = ipmi_monitoring_ctx_create ())) {
  298. collector_error("ipmi_monitoring_ctx_create()");
  299. goto cleanup;
  300. }
  301. timing_step(TIMING_STEP_FREEIPMI_CTX_CREATE);
  302. if (sdr_cache_directory) {
  303. if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, sdr_cache_directory) < 0) {
  304. collector_error("ipmi_monitoring_ctx_sdr_cache_directory(): %s\n", ipmi_monitoring_ctx_errormsg (ctx));
  305. goto cleanup;
  306. }
  307. }
  308. if (sdr_sensors_cache_format) {
  309. if (ipmi_monitoring_ctx_sdr_cache_filenames(ctx, sdr_sensors_cache_format) < 0) {
  310. collector_error("ipmi_monitoring_ctx_sdr_cache_filenames(): %s\n", ipmi_monitoring_ctx_errormsg (ctx));
  311. goto cleanup;
  312. }
  313. }
  314. timing_step(TIMING_STEP_FREEIPMI_DSR_CACHE_DIR);
  315. // Must call otherwise only default interpretations ever used
  316. // sensor_config_file can be NULL
  317. if (ipmi_monitoring_ctx_sensor_config_file (ctx, sensor_config_file) < 0) {
  318. collector_error( "ipmi_monitoring_ctx_sensor_config_file(): %s\n", ipmi_monitoring_ctx_errormsg (ctx));
  319. goto cleanup;
  320. }
  321. timing_step(TIMING_STEP_FREEIPMI_SENSOR_CONFIG_FILE);
  322. if ((sensor_count = ipmi_monitoring_sensor_readings_by_record_id (ctx,
  323. hostname,
  324. ipmi_config,
  325. sensor_reading_flags,
  326. NULL,
  327. 0,
  328. NULL,
  329. NULL)) < 0) {
  330. collector_error( "ipmi_monitoring_sensor_readings_by_record_id(): %s",
  331. ipmi_monitoring_ctx_errormsg (ctx));
  332. goto cleanup;
  333. }
  334. timing_step(TIMING_STEP_FREEIPMI_SENSOR_READINGS_BY_X);
  335. for (i = 0; i < sensor_count; i++, ipmi_monitoring_sensor_iterator_next (ctx)) {
  336. int record_id, sensor_number, sensor_type, sensor_state, sensor_units,
  337. sensor_bitmask_type, sensor_bitmask, event_reading_type_code, sensor_reading_type;
  338. char **sensor_bitmask_strings = NULL;
  339. char *sensor_name = NULL;
  340. void *sensor_reading;
  341. netdata_ipmi_get_value_int(record_id, ipmi_monitoring_sensor_read_record_id, ctx);
  342. netdata_ipmi_get_value_int(sensor_number, ipmi_monitoring_sensor_read_sensor_number, ctx);
  343. netdata_ipmi_get_value_int(sensor_type, ipmi_monitoring_sensor_read_sensor_type, ctx);
  344. netdata_ipmi_get_value_ptr(sensor_name, ipmi_monitoring_sensor_read_sensor_name, ctx);
  345. netdata_ipmi_get_value_int(sensor_state, ipmi_monitoring_sensor_read_sensor_state, ctx);
  346. netdata_ipmi_get_value_int(sensor_units, ipmi_monitoring_sensor_read_sensor_units, ctx);
  347. netdata_ipmi_get_value_int(sensor_bitmask_type, ipmi_monitoring_sensor_read_sensor_bitmask_type, ctx);
  348. netdata_ipmi_get_value_int(sensor_bitmask, ipmi_monitoring_sensor_read_sensor_bitmask, ctx);
  349. // it's ok for this to be NULL, i.e. sensor_bitmask == IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN
  350. netdata_ipmi_get_value_no_check(sensor_bitmask_strings, ipmi_monitoring_sensor_read_sensor_bitmask_strings, ctx);
  351. netdata_ipmi_get_value_int(sensor_reading_type, ipmi_monitoring_sensor_read_sensor_reading_type, ctx);
  352. // whatever we read from the sensor, it is ok
  353. netdata_ipmi_get_value_no_check(sensor_reading, ipmi_monitoring_sensor_read_sensor_reading, ctx);
  354. netdata_ipmi_get_value_int(event_reading_type_code, ipmi_monitoring_sensor_read_event_reading_type_code, ctx);
  355. netdata_update_ipmi_sensor_reading(
  356. record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type, sensor_name,
  357. sensor_reading, event_reading_type_code, sensor_bitmask_type, sensor_bitmask, sensor_bitmask_strings,
  358. state
  359. );
  360. #ifdef NETDATA_COMMENTED
  361. /* It is possible you may want to monitor specific event
  362. * conditions that may occur. If that is the case, you may want
  363. * to check out what specific bitmask type and bitmask events
  364. * occurred. See ipmi_monitoring_bitmasks.h for a list of
  365. * bitmasks and types.
  366. */
  367. if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN)
  368. printf (", %Xh", sensor_bitmask);
  369. else
  370. printf (", N/A");
  371. if (sensor_bitmask_type != IPMI_MONITORING_SENSOR_BITMASK_TYPE_UNKNOWN
  372. && sensor_bitmask_strings)
  373. {
  374. unsigned int i = 0;
  375. printf (",");
  376. while (sensor_bitmask_strings[i])
  377. {
  378. printf (" ");
  379. printf ("'%s'",
  380. sensor_bitmask_strings[i]);
  381. i++;
  382. }
  383. }
  384. else
  385. printf (", N/A");
  386. printf ("\n");
  387. #endif // NETDATA_COMMENTED
  388. }
  389. rv = 0;
  390. cleanup:
  391. if (ctx)
  392. ipmi_monitoring_ctx_destroy (ctx);
  393. timing_report();
  394. if(remove_reread_sdr_after_first_use)
  395. global_sensor_reading_flags &= ~(IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE);
  396. return (rv);
  397. }
  398. static int netdata_get_ipmi_sel_events_count(struct ipmi_monitoring_ipmi_config *ipmi_config, struct netdata_ipmi_state *state) {
  399. timing_init();
  400. ipmi_monitoring_ctx_t ctx = NULL;
  401. unsigned int sel_flags = global_sel_flags;
  402. int sel_count;
  403. int rv = -1;
  404. if (!(ctx = ipmi_monitoring_ctx_create ())) {
  405. collector_error("ipmi_monitoring_ctx_create()");
  406. goto cleanup;
  407. }
  408. if (sdr_cache_directory) {
  409. if (ipmi_monitoring_ctx_sdr_cache_directory (ctx, sdr_cache_directory) < 0) {
  410. collector_error( "ipmi_monitoring_ctx_sdr_cache_directory(): %s", ipmi_monitoring_ctx_errormsg (ctx));
  411. goto cleanup;
  412. }
  413. }
  414. if (sdr_sel_cache_format) {
  415. if (ipmi_monitoring_ctx_sdr_cache_filenames(ctx, sdr_sel_cache_format) < 0) {
  416. collector_error("ipmi_monitoring_ctx_sdr_cache_filenames(): %s\n", ipmi_monitoring_ctx_errormsg (ctx));
  417. goto cleanup;
  418. }
  419. }
  420. // Must call otherwise only default interpretations ever used
  421. // sel_config_file can be NULL
  422. if (ipmi_monitoring_ctx_sel_config_file (ctx, sel_config_file) < 0) {
  423. collector_error( "ipmi_monitoring_ctx_sel_config_file(): %s",
  424. ipmi_monitoring_ctx_errormsg (ctx));
  425. goto cleanup;
  426. }
  427. if ((sel_count = ipmi_monitoring_sel_by_record_id (ctx,
  428. hostname,
  429. ipmi_config,
  430. sel_flags,
  431. NULL,
  432. 0,
  433. NULL,
  434. NULL)) < 0) {
  435. collector_error( "ipmi_monitoring_sel_by_record_id(): %s",
  436. ipmi_monitoring_ctx_errormsg (ctx));
  437. goto cleanup;
  438. }
  439. netdata_update_ipmi_sel_events_count(state, sel_count);
  440. rv = 0;
  441. cleanup:
  442. if (ctx)
  443. ipmi_monitoring_ctx_destroy (ctx);
  444. timing_report();
  445. if(remove_reread_sdr_after_first_use)
  446. global_sel_flags &= ~(IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE);
  447. return (rv);
  448. }
  449. // ----------------------------------------------------------------------------
  450. // copied from freeipmi codebase commit 8dea6dec4012d0899901e595f2c868a05e1cefed
  451. // added netdata_ in-front to not overwrite library functions
  452. // FROM: common/miscutil/network.c
  453. static int netdata_host_is_localhost (const char *host) {
  454. /* Ordered by my assumption of most popular */
  455. if (!strcasecmp (host, "localhost")
  456. || !strcmp (host, "127.0.0.1")
  457. || !strcasecmp (host, "ipv6-localhost")
  458. || !strcmp (host, "::1")
  459. || !strcasecmp (host, "ip6-localhost")
  460. || !strcmp (host, "0:0:0:0:0:0:0:1"))
  461. return (1);
  462. return (0);
  463. }
  464. // FROM: common/parsecommon/parse-common.h
  465. #define IPMI_PARSE_DEVICE_LAN_STR "lan"
  466. #define IPMI_PARSE_DEVICE_LAN_2_0_STR "lan_2_0"
  467. #define IPMI_PARSE_DEVICE_LAN_2_0_STR2 "lan20"
  468. #define IPMI_PARSE_DEVICE_LAN_2_0_STR3 "lan_20"
  469. #define IPMI_PARSE_DEVICE_LAN_2_0_STR4 "lan2_0"
  470. #define IPMI_PARSE_DEVICE_LAN_2_0_STR5 "lanplus"
  471. #define IPMI_PARSE_DEVICE_KCS_STR "kcs"
  472. #define IPMI_PARSE_DEVICE_SSIF_STR "ssif"
  473. #define IPMI_PARSE_DEVICE_OPENIPMI_STR "openipmi"
  474. #define IPMI_PARSE_DEVICE_OPENIPMI_STR2 "open"
  475. #define IPMI_PARSE_DEVICE_SUNBMC_STR "sunbmc"
  476. #define IPMI_PARSE_DEVICE_SUNBMC_STR2 "bmc"
  477. #define IPMI_PARSE_DEVICE_INTELDCMI_STR "inteldcmi"
  478. // FROM: common/parsecommon/parse-common.c
  479. // changed the return values to match ipmi_monitoring.h
  480. static int netdata_parse_outofband_driver_type (const char *str) {
  481. if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_STR) == 0)
  482. return (IPMI_MONITORING_PROTOCOL_VERSION_1_5);
  483. /* support "lanplus" for those that might be used to ipmitool.
  484. * support typo variants to ease.
  485. */
  486. else if (strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR) == 0
  487. || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR2) == 0
  488. || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR3) == 0
  489. || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR4) == 0
  490. || strcasecmp (str, IPMI_PARSE_DEVICE_LAN_2_0_STR5) == 0)
  491. return (IPMI_MONITORING_PROTOCOL_VERSION_2_0);
  492. return (-1);
  493. }
  494. // FROM: common/parsecommon/parse-common.c
  495. // changed the return values to match ipmi_monitoring.h
  496. static int netdata_parse_inband_driver_type (const char *str) {
  497. if (strcasecmp (str, IPMI_PARSE_DEVICE_KCS_STR) == 0)
  498. return (IPMI_MONITORING_DRIVER_TYPE_KCS);
  499. else if (strcasecmp (str, IPMI_PARSE_DEVICE_SSIF_STR) == 0)
  500. return (IPMI_MONITORING_DRIVER_TYPE_SSIF);
  501. /* support "open" for those that might be used to
  502. * ipmitool.
  503. */
  504. else if (strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR) == 0
  505. || strcasecmp (str, IPMI_PARSE_DEVICE_OPENIPMI_STR2) == 0)
  506. return (IPMI_MONITORING_DRIVER_TYPE_OPENIPMI);
  507. /* support "bmc" for those that might be used to
  508. * ipmitool.
  509. */
  510. else if (strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR) == 0
  511. || strcasecmp (str, IPMI_PARSE_DEVICE_SUNBMC_STR2) == 0)
  512. return (IPMI_MONITORING_DRIVER_TYPE_SUNBMC);
  513. #ifdef IPMI_MONITORING_DRIVER_TYPE_INTELDCMI
  514. else if (strcasecmp (str, IPMI_PARSE_DEVICE_INTELDCMI_STR) == 0)
  515. return (IPMI_MONITORING_DRIVER_TYPE_INTELDCMI);
  516. #endif // IPMI_MONITORING_DRIVER_TYPE_INTELDCMI
  517. return (-1);
  518. }
  519. // ----------------------------------------------------------------------------
  520. // BEGIN NETDATA CODE
  521. typedef enum __attribute__((packed)) {
  522. IPMI_COLLECT_TYPE_SENSORS = (1 << 0),
  523. IPMI_COLLECT_TYPE_SEL = (1 << 1),
  524. } IPMI_COLLECTION_TYPE;
  525. struct sensor {
  526. int sensor_type;
  527. int sensor_state;
  528. int sensor_units;
  529. char *sensor_name;
  530. int sensor_reading_type;
  531. union {
  532. uint8_t bool_value;
  533. uint32_t uint32_value;
  534. double double_value;
  535. } sensor_reading;
  536. // netdata provided
  537. const char *context;
  538. const char *title;
  539. const char *units;
  540. const char *family;
  541. const char *chart_type;
  542. const char *dimension;
  543. int priority;
  544. const char *type;
  545. const char *component;
  546. int multiplier;
  547. bool do_metric;
  548. bool do_state;
  549. bool metric_chart_sent;
  550. bool state_chart_sent;
  551. usec_t last_collected_metric_ut;
  552. usec_t last_collected_state_ut;
  553. };
  554. typedef enum __attribute__((packed)) {
  555. ICS_INIT,
  556. ICS_INIT_FAILED,
  557. ICS_RUNNING,
  558. ICS_FAILED,
  559. } IPMI_COLLECTOR_STATUS;
  560. struct netdata_ipmi_state {
  561. bool debug;
  562. struct {
  563. IPMI_COLLECTOR_STATUS status;
  564. usec_t last_iteration_ut;
  565. size_t collected;
  566. usec_t now_ut;
  567. usec_t freq_ut;
  568. int priority;
  569. DICTIONARY *dict;
  570. } sensors;
  571. struct {
  572. IPMI_COLLECTOR_STATUS status;
  573. usec_t last_iteration_ut;
  574. size_t events;
  575. usec_t now_ut;
  576. usec_t freq_ut;
  577. int priority;
  578. } sel;
  579. struct {
  580. usec_t now_ut;
  581. } updates;
  582. };
  583. struct netdata_ipmi_state state = {0};
  584. // ----------------------------------------------------------------------------
  585. // excluded record ids maintenance (both for sensor data and state)
  586. static int *excluded_record_ids = NULL;
  587. size_t excluded_record_ids_length = 0;
  588. static void excluded_record_ids_parse(const char *s, bool debug) {
  589. if(!s) return;
  590. while(*s) {
  591. while(*s && !isdigit(*s)) s++;
  592. if(isdigit(*s)) {
  593. char *e;
  594. unsigned long n = strtoul(s, &e, 10);
  595. s = e;
  596. if(n != 0) {
  597. excluded_record_ids = reallocz(excluded_record_ids, (excluded_record_ids_length + 1) * sizeof(int));
  598. excluded_record_ids[excluded_record_ids_length++] = (int)n;
  599. }
  600. }
  601. }
  602. if(debug) {
  603. fprintf(stderr, "%s: excluded record ids:", program_name);
  604. size_t i;
  605. for(i = 0; i < excluded_record_ids_length; i++) {
  606. fprintf(stderr, " %d", excluded_record_ids[i]);
  607. }
  608. fprintf(stderr, "\n");
  609. }
  610. }
  611. static int *excluded_status_record_ids = NULL;
  612. size_t excluded_status_record_ids_length = 0;
  613. static void excluded_status_record_ids_parse(const char *s, bool debug) {
  614. if(!s) return;
  615. while(*s) {
  616. while(*s && !isdigit(*s)) s++;
  617. if(isdigit(*s)) {
  618. char *e;
  619. unsigned long n = strtoul(s, &e, 10);
  620. s = e;
  621. if(n != 0) {
  622. excluded_status_record_ids = reallocz(excluded_status_record_ids, (excluded_status_record_ids_length + 1) * sizeof(int));
  623. excluded_status_record_ids[excluded_status_record_ids_length++] = (int)n;
  624. }
  625. }
  626. }
  627. if(debug) {
  628. fprintf(stderr, "%s: excluded status record ids:", program_name);
  629. size_t i;
  630. for(i = 0; i < excluded_status_record_ids_length; i++) {
  631. fprintf(stderr, " %d", excluded_status_record_ids[i]);
  632. }
  633. fprintf(stderr, "\n");
  634. }
  635. }
  636. static int excluded_record_ids_check(int record_id) {
  637. size_t i;
  638. for(i = 0; i < excluded_record_ids_length; i++) {
  639. if(excluded_record_ids[i] == record_id)
  640. return 1;
  641. }
  642. return 0;
  643. }
  644. static int excluded_status_record_ids_check(int record_id) {
  645. size_t i;
  646. for(i = 0; i < excluded_status_record_ids_length; i++) {
  647. if(excluded_status_record_ids[i] == record_id)
  648. return 1;
  649. }
  650. return 0;
  651. }
  652. // ----------------------------------------------------------------------------
  653. // data collection functions
  654. struct {
  655. const char *search;
  656. SIMPLE_PATTERN *pattern;
  657. const char *label;
  658. } sensors_component_patterns[] = {
  659. // The order is important!
  660. // They are evaluated top to bottom
  661. // The first the matches is used
  662. {
  663. .search = "*DIMM*|*_DIM*|*VTT*|*VDDQ*|*ECC*|*MEM*CRC*|*MEM*BD*",
  664. .label = NETDATA_SENSOR_COMPONENT_MEMORY_MODULE,
  665. },
  666. {
  667. .search = "*CPU*|SOC_*|*VDDCR*|P*_VDD*|*_DTS|*VCORE*|*PROC*",
  668. .label = NETDATA_SENSOR_COMPONENT_PROCESSOR,
  669. },
  670. {
  671. .search = "IPU*",
  672. .label = NETDATA_SENSOR_COMPONENT_IPU,
  673. },
  674. {
  675. .search = "M2_*|*SSD*|*HSC*|*HDD*|*NVME*",
  676. .label = NETDATA_SENSOR_COMPONENT_STORAGE,
  677. },
  678. {
  679. .search = "MB_*|*PCH*|*VBAT*|*I/O*BD*|*IO*BD*",
  680. .label = NETDATA_SENSOR_COMPONENT_MOTHERBOARD,
  681. },
  682. {
  683. .search = "Watchdog|SEL|SYS_*|*CHASSIS*",
  684. .label = NETDATA_SENSOR_COMPONENT_SYSTEM,
  685. },
  686. {
  687. .search = "PS*|P_*|*PSU*|*PWR*|*TERMV*|*D2D*",
  688. .label = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY,
  689. },
  690. // fallback components
  691. {
  692. .search = "VR_P*|*VRMP*",
  693. .label = NETDATA_SENSOR_COMPONENT_PROCESSOR,
  694. },
  695. {
  696. .search = "*VSB*|*PS*",
  697. .label = NETDATA_SENSOR_COMPONENT_POWER_SUPPLY,
  698. },
  699. {
  700. .search = "*MEM*|*MEM*RAID*",
  701. .label = NETDATA_SENSOR_COMPONENT_MEMORY,
  702. },
  703. {
  704. .search = "*RAID*", // there is also "Memory RAID", so keep this after memory
  705. .label = NETDATA_SENSOR_COMPONENT_STORAGE,
  706. },
  707. {
  708. .search = "*PERIPHERAL*|*USB*",
  709. .label = NETDATA_SENSOR_COMPONENT_PERIPHERAL,
  710. },
  711. {
  712. .search = "*FAN*|*12V*|*VCC*|*PCI*|*CHIPSET*|*AMP*|*BD*",
  713. .label = NETDATA_SENSOR_COMPONENT_SYSTEM,
  714. },
  715. // terminator
  716. {
  717. .search = NULL,
  718. .label = NULL,
  719. }
  720. };
  721. static const char *netdata_sensor_name_to_component(const char *sensor_name) {
  722. for(int i = 0; sensors_component_patterns[i].search ;i++) {
  723. if(!sensors_component_patterns[i].pattern)
  724. sensors_component_patterns[i].pattern = simple_pattern_create(sensors_component_patterns[i].search, "|", SIMPLE_PATTERN_EXACT, false);
  725. if(simple_pattern_matches(sensors_component_patterns[i].pattern, sensor_name))
  726. return sensors_component_patterns[i].label;
  727. }
  728. return "Other";
  729. }
  730. const char *netdata_collect_type_to_string(IPMI_COLLECTION_TYPE type) {
  731. if((type & (IPMI_COLLECT_TYPE_SENSORS|IPMI_COLLECT_TYPE_SEL)) == (IPMI_COLLECT_TYPE_SENSORS|IPMI_COLLECT_TYPE_SEL))
  732. return "sensors,sel";
  733. if(type & IPMI_COLLECT_TYPE_SEL)
  734. return "sel";
  735. if(type & IPMI_COLLECT_TYPE_SENSORS)
  736. return "sensors";
  737. return "unknown";
  738. }
  739. static void netdata_sensor_set_value(struct sensor *sn, void *sensor_reading, struct netdata_ipmi_state *state __maybe_unused) {
  740. switch(sn->sensor_reading_type) {
  741. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
  742. sn->sensor_reading.bool_value = *((uint8_t *)sensor_reading);
  743. break;
  744. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
  745. sn->sensor_reading.uint32_value = *((uint32_t *)sensor_reading);
  746. break;
  747. case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
  748. sn->sensor_reading.double_value = *((double *)sensor_reading);
  749. break;
  750. default:
  751. case IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN:
  752. sn->do_metric = false;
  753. break;
  754. }
  755. }
  756. static void netdata_update_ipmi_sensor_reading(
  757. int record_id
  758. , int sensor_number
  759. , int sensor_type
  760. , int sensor_state
  761. , int sensor_units
  762. , int sensor_reading_type
  763. , char *sensor_name
  764. , void *sensor_reading
  765. , int event_reading_type_code __maybe_unused
  766. , int sensor_bitmask_type __maybe_unused
  767. , int sensor_bitmask __maybe_unused
  768. , char **sensor_bitmask_strings __maybe_unused
  769. , struct netdata_ipmi_state *state
  770. ) {
  771. if(unlikely(sensor_state == IPMI_MONITORING_STATE_UNKNOWN &&
  772. sensor_type == IPMI_MONITORING_SENSOR_TYPE_UNKNOWN &&
  773. sensor_units == IPMI_MONITORING_SENSOR_UNITS_UNKNOWN &&
  774. sensor_reading_type == IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN &&
  775. (!sensor_name || !*sensor_name)))
  776. // we can't do anything about this sensor - everything is unknown
  777. return;
  778. if(unlikely(!sensor_name || !*sensor_name))
  779. sensor_name = "UNNAMED";
  780. state->sensors.collected++;
  781. char key[SENSORS_DICT_KEY_SIZE + 1];
  782. snprintfz(key, SENSORS_DICT_KEY_SIZE, "i%d_n%d_t%d_u%d_%s",
  783. record_id, sensor_number, sensor_reading_type, sensor_units, sensor_name);
  784. // find the sensor record
  785. const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(state->sensors.dict, key);
  786. if(likely(item)) {
  787. // recurring collection
  788. if(state->debug)
  789. fprintf(stderr, "%s: reusing sensor record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n",
  790. program_name, sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type);
  791. struct sensor *sn = dictionary_acquired_item_value(item);
  792. if(sensor_reading) {
  793. netdata_sensor_set_value(sn, sensor_reading, state);
  794. sn->last_collected_metric_ut = state->sensors.now_ut;
  795. }
  796. sn->sensor_state = sensor_state;
  797. sn->last_collected_state_ut = state->sensors.now_ut;
  798. dictionary_acquired_item_release(state->sensors.dict, item);
  799. return;
  800. }
  801. if(state->debug)
  802. fprintf(stderr, "Allocating new sensor data record for sensor '%s', id %d, number %d, type %d, state %d, units %d, reading_type %d\n",
  803. sensor_name, record_id, sensor_number, sensor_type, sensor_state, sensor_units, sensor_reading_type);
  804. // check if it is excluded
  805. bool excluded_metric = excluded_record_ids_check(record_id);
  806. bool excluded_state = excluded_status_record_ids_check(record_id);
  807. if(excluded_metric) {
  808. if(state->debug)
  809. fprintf(stderr, "Sensor '%s' is excluded by excluded_record_ids_check()\n", sensor_name);
  810. }
  811. if(excluded_state) {
  812. if(state->debug)
  813. fprintf(stderr, "Sensor '%s' is excluded for status check, by excluded_status_record_ids_check()\n", sensor_name);
  814. }
  815. struct sensor t = {
  816. .sensor_type = sensor_type,
  817. .sensor_state = sensor_state,
  818. .sensor_units = sensor_units,
  819. .sensor_reading_type = sensor_reading_type,
  820. .sensor_name = strdupz(sensor_name),
  821. .component = netdata_sensor_name_to_component(sensor_name),
  822. .do_state = !excluded_state,
  823. .do_metric = !excluded_metric,
  824. };
  825. t.type = netdata_ipmi_get_sensor_type_string(t.sensor_type, &t.component);
  826. switch(t.sensor_units) {
  827. case IPMI_MONITORING_SENSOR_UNITS_CELSIUS:
  828. t.dimension = "temperature";
  829. t.context = "ipmi.sensor_temperature_c";
  830. t.title = "IPMI Sensor Temperature Celsius";
  831. t.units = "Celsius";
  832. t.family = "temperatures";
  833. t.chart_type = "line";
  834. t.priority = state->sensors.priority + 10;
  835. break;
  836. case IPMI_MONITORING_SENSOR_UNITS_FAHRENHEIT:
  837. t.dimension = "temperature";
  838. t.context = "ipmi.sensor_temperature_f";
  839. t.title = "IPMI Sensor Temperature Fahrenheit";
  840. t.units = "Fahrenheit";
  841. t.family = "temperatures";
  842. t.chart_type = "line";
  843. t.priority = state->sensors.priority + 20;
  844. break;
  845. case IPMI_MONITORING_SENSOR_UNITS_VOLTS:
  846. t.dimension = "voltage";
  847. t.context = "ipmi.sensor_voltage";
  848. t.title = "IPMI Sensor Voltage";
  849. t.units = "Volts";
  850. t.family = "voltages";
  851. t.chart_type = "line";
  852. t.priority = state->sensors.priority + 30;
  853. break;
  854. case IPMI_MONITORING_SENSOR_UNITS_AMPS:
  855. t.dimension = "ampere";
  856. t.context = "ipmi.sensor_ampere";
  857. t.title = "IPMI Sensor Current";
  858. t.units = "Amps";
  859. t.family = "current";
  860. t.chart_type = "line";
  861. t.priority = state->sensors.priority + 40;
  862. break;
  863. case IPMI_MONITORING_SENSOR_UNITS_RPM:
  864. t.dimension = "rotations";
  865. t.context = "ipmi.sensor_fan_speed";
  866. t.title = "IPMI Sensor Fans Speed";
  867. t.units = "RPM";
  868. t.family = "fans";
  869. t.chart_type = "line";
  870. t.priority = state->sensors.priority + 50;
  871. break;
  872. case IPMI_MONITORING_SENSOR_UNITS_WATTS:
  873. t.dimension = "power";
  874. t.context = "ipmi.sensor_power";
  875. t.title = "IPMI Sensor Power";
  876. t.units = "Watts";
  877. t.family = "power";
  878. t.chart_type = "line";
  879. t.priority = state->sensors.priority + 60;
  880. break;
  881. case IPMI_MONITORING_SENSOR_UNITS_PERCENT:
  882. t.dimension = "percentage";
  883. t.context = "ipmi.sensor_reading_percent";
  884. t.title = "IPMI Sensor Reading Percentage";
  885. t.units = "%%";
  886. t.family = "other";
  887. t.chart_type = "line";
  888. t.priority = state->sensors.priority + 70;
  889. break;
  890. default:
  891. t.priority = state->sensors.priority + 80;
  892. t.do_metric = false;
  893. break;
  894. }
  895. switch(sensor_reading_type) {
  896. case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
  897. t.multiplier = 1000;
  898. break;
  899. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
  900. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
  901. t.multiplier = 1;
  902. break;
  903. default:
  904. t.do_metric = false;
  905. break;
  906. }
  907. if(sensor_reading) {
  908. netdata_sensor_set_value(&t, sensor_reading, state);
  909. t.last_collected_metric_ut = state->sensors.now_ut;
  910. }
  911. t.last_collected_state_ut = state->sensors.now_ut;
  912. dictionary_set(state->sensors.dict, key, &t, sizeof(t));
  913. }
  914. static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *state, uint32_t events) {
  915. state->sel.events = events;
  916. }
  917. int netdata_ipmi_collect_data(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) {
  918. errno = 0;
  919. if(type & IPMI_COLLECT_TYPE_SENSORS) {
  920. state->sensors.collected = 0;
  921. state->sensors.now_ut = now_monotonic_usec();
  922. if (netdata_read_ipmi_sensors(ipmi_config, state) < 0) return -1;
  923. }
  924. if(type & IPMI_COLLECT_TYPE_SEL) {
  925. state->sel.events = 0;
  926. state->sel.now_ut = now_monotonic_usec();
  927. if(netdata_get_ipmi_sel_events_count(ipmi_config, state) < 0) return -2;
  928. }
  929. return 0;
  930. }
  931. int netdata_ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_config, IPMI_COLLECTION_TYPE type, struct netdata_ipmi_state *state) {
  932. int i, checks = SPEED_TEST_ITERATIONS, successful = 0;
  933. usec_t total = 0;
  934. for(i = 0 ; i < checks ; i++) {
  935. if(unlikely(state->debug))
  936. fprintf(stderr, "%s: checking %s data collection speed iteration %d of %d\n",
  937. program_name, netdata_collect_type_to_string(type), i + 1, checks);
  938. // measure the time a data collection needs
  939. usec_t start = now_realtime_usec();
  940. if(netdata_ipmi_collect_data(ipmi_config, type, state) < 0)
  941. continue;
  942. usec_t end = now_realtime_usec();
  943. successful++;
  944. if(unlikely(state->debug))
  945. fprintf(stderr, "%s: %s data collection speed was %"PRIu64" usec\n",
  946. program_name, netdata_collect_type_to_string(type), end - start);
  947. // add it to our total
  948. total += end - start;
  949. // wait the same time
  950. // to avoid flooding the IPMI processor with requests
  951. sleep_usec(end - start);
  952. }
  953. if(!successful)
  954. return 0;
  955. // so, we assume it needed 2x the time
  956. // we find the average in microseconds
  957. // and we round-up to the closest second
  958. return (int)(( total * 2 / successful / USEC_PER_SEC ) + 1);
  959. }
  960. // ----------------------------------------------------------------------------
  961. // data collection threads
  962. struct ipmi_collection_thread {
  963. struct ipmi_monitoring_ipmi_config ipmi_config;
  964. int freq_s;
  965. bool debug;
  966. IPMI_COLLECTION_TYPE type;
  967. SPINLOCK spinlock;
  968. struct netdata_ipmi_state state;
  969. };
  970. void *netdata_ipmi_collection_thread(void *ptr) {
  971. struct ipmi_collection_thread *t = ptr;
  972. if(t->debug) fprintf(stderr, "%s: calling initialize_ipmi_config() for %s\n",
  973. program_name, netdata_collect_type_to_string(t->type));
  974. initialize_ipmi_config(&t->ipmi_config);
  975. if(t->debug) fprintf(stderr, "%s: detecting IPMI minimum update frequency for %s...\n",
  976. program_name, netdata_collect_type_to_string(t->type));
  977. int freq_s = netdata_ipmi_detect_speed_secs(&t->ipmi_config, t->type, &t->state);
  978. if(!freq_s) {
  979. if(t->type & IPMI_COLLECT_TYPE_SENSORS) {
  980. t->state.sensors.status = ICS_INIT_FAILED;
  981. t->state.sensors.last_iteration_ut = 0;
  982. }
  983. if(t->type & IPMI_COLLECT_TYPE_SEL) {
  984. t->state.sel.status = ICS_INIT_FAILED;
  985. t->state.sel.last_iteration_ut = 0;
  986. }
  987. return ptr;
  988. }
  989. else {
  990. if(t->type & IPMI_COLLECT_TYPE_SENSORS) {
  991. t->state.sensors.status = ICS_RUNNING;
  992. }
  993. if(t->type & IPMI_COLLECT_TYPE_SEL) {
  994. t->state.sel.status = ICS_RUNNING;
  995. }
  996. }
  997. t->freq_s = freq_s = MAX(t->freq_s, freq_s);
  998. if(t->debug) {
  999. fprintf(stderr, "%s: IPMI minimum update frequency of %s was calculated to %d seconds.\n",
  1000. program_name, netdata_collect_type_to_string(t->type), t->freq_s);
  1001. fprintf(stderr, "%s: starting data collection of %s\n",
  1002. program_name, netdata_collect_type_to_string(t->type));
  1003. }
  1004. size_t iteration = 0, failures = 0;
  1005. usec_t step = t->freq_s * USEC_PER_SEC;
  1006. heartbeat_t hb;
  1007. heartbeat_init(&hb);
  1008. while(++iteration) {
  1009. heartbeat_next(&hb, step);
  1010. if(t->debug)
  1011. fprintf(stderr, "%s: calling netdata_ipmi_collect_data() for %s\n",
  1012. program_name, netdata_collect_type_to_string(t->type));
  1013. struct netdata_ipmi_state tmp_state = t->state;
  1014. if(t->type & IPMI_COLLECT_TYPE_SENSORS) {
  1015. tmp_state.sensors.last_iteration_ut = now_monotonic_usec();
  1016. tmp_state.sensors.freq_ut = t->freq_s * USEC_PER_SEC;
  1017. }
  1018. if(t->type & IPMI_COLLECT_TYPE_SEL) {
  1019. tmp_state.sel.last_iteration_ut = now_monotonic_usec();
  1020. tmp_state.sel.freq_ut = t->freq_s * USEC_PER_SEC;
  1021. }
  1022. if(netdata_ipmi_collect_data(&t->ipmi_config, t->type, &tmp_state) != 0)
  1023. failures++;
  1024. else
  1025. failures = 0;
  1026. if(failures > 10) {
  1027. collector_error("%s() failed to collect %s data for %zu consecutive times, having made %zu iterations.",
  1028. __FUNCTION__, netdata_collect_type_to_string(t->type), failures, iteration);
  1029. if(t->type & IPMI_COLLECT_TYPE_SENSORS) {
  1030. t->state.sensors.status = ICS_FAILED;
  1031. t->state.sensors.last_iteration_ut = 0;
  1032. }
  1033. if(t->type & IPMI_COLLECT_TYPE_SEL) {
  1034. t->state.sel.status = ICS_FAILED;
  1035. t->state.sel.last_iteration_ut = 0;
  1036. }
  1037. break;
  1038. }
  1039. spinlock_lock(&t->spinlock);
  1040. t->state = tmp_state;
  1041. spinlock_unlock(&t->spinlock);
  1042. }
  1043. return ptr;
  1044. }
  1045. // ----------------------------------------------------------------------------
  1046. // sending data to netdata
  1047. static inline bool is_sensor_updated(usec_t last_collected_ut, usec_t now_ut, usec_t freq) {
  1048. return (now_ut - last_collected_ut < freq * 2) ? true : false;
  1049. }
  1050. static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *state) {
  1051. if(state->sensors.status != ICS_RUNNING) {
  1052. if(unlikely(state->debug))
  1053. fprintf(stderr, "%s: %s() sensors state is not RUNNING\n",
  1054. program_name, __FUNCTION__ );
  1055. return 0;
  1056. }
  1057. size_t total_sensors_sent = 0;
  1058. int update_every = (int)(state->sensors.freq_ut / USEC_PER_SEC);
  1059. struct sensor *sn;
  1060. netdata_mutex_lock(&stdout_mutex);
  1061. // generate the CHART/DIMENSION lines, if we have to
  1062. dfe_start_reentrant(state->sensors.dict, sn) {
  1063. if(unlikely(!sn->do_metric && !sn->do_state))
  1064. continue;
  1065. bool did_metric = false, did_state = false;
  1066. if(likely(sn->do_metric)) {
  1067. if(unlikely(!is_sensor_updated(sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut))) {
  1068. if(unlikely(state->debug))
  1069. fprintf(stderr, "%s: %s() sensor '%s' metric is not UPDATED (last updated %"PRIu64", now %"PRIu64", freq %"PRIu64"\n",
  1070. program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut);
  1071. }
  1072. else {
  1073. if (unlikely(!sn->metric_chart_sent)) {
  1074. sn->metric_chart_sent = true;
  1075. printf("CHART '%s_%s' '' '%s' '%s' '%s' '%s' '%s' %d %d '' '%s' '%s'\n",
  1076. sn->context, sn_dfe.name, sn->title, sn->units, sn->family, sn->context,
  1077. sn->chart_type, sn->priority + 1, update_every, program_name, "sensors");
  1078. printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name);
  1079. printf("CLABEL 'type' '%s' 1\n", sn->type);
  1080. printf("CLABEL 'component' '%s' 1\n", sn->component);
  1081. printf("CLABEL_COMMIT\n");
  1082. printf("DIMENSION '%s' '' absolute 1 %d\n", sn->dimension, sn->multiplier);
  1083. }
  1084. printf("BEGIN '%s_%s'\n", sn->context, sn_dfe.name);
  1085. switch (sn->sensor_reading_type) {
  1086. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
  1087. printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.uint32_value
  1088. );
  1089. break;
  1090. case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
  1091. printf("SET '%s' = %lld\n", sn->dimension,
  1092. (long long int) (sn->sensor_reading.double_value * sn->multiplier)
  1093. );
  1094. break;
  1095. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
  1096. printf("SET '%s' = %u\n", sn->dimension, sn->sensor_reading.bool_value
  1097. );
  1098. break;
  1099. default:
  1100. case IPMI_MONITORING_SENSOR_READING_TYPE_UNKNOWN:
  1101. // this should never happen because we also do the same check at netdata_get_sensor()
  1102. sn->do_metric = false;
  1103. break;
  1104. }
  1105. printf("END\n");
  1106. did_metric = true;
  1107. }
  1108. }
  1109. if(likely(sn->do_state)) {
  1110. if(unlikely(!is_sensor_updated(sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut))) {
  1111. if (unlikely(state->debug))
  1112. fprintf(stderr, "%s: %s() sensor '%s' state is not UPDATED (last updated %"PRIu64", now %"PRIu64", freq %"PRIu64"\n",
  1113. program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut);
  1114. }
  1115. else {
  1116. if (unlikely(!sn->state_chart_sent)) {
  1117. sn->state_chart_sent = true;
  1118. printf("CHART 'ipmi.sensor_state_%s' '' 'IPMI Sensor State' 'state' 'states' 'ipmi.sensor_state' 'line' %d %d '' '%s' '%s'\n",
  1119. sn_dfe.name, sn->priority, update_every, program_name, "sensors");
  1120. printf("CLABEL 'sensor' '%s' 1\n", sn->sensor_name);
  1121. printf("CLABEL 'type' '%s' 1\n", sn->type);
  1122. printf("CLABEL 'component' '%s' 1\n", sn->component);
  1123. printf("CLABEL_COMMIT\n");
  1124. printf("DIMENSION 'nominal' '' absolute 1 1\n");
  1125. printf("DIMENSION 'warning' '' absolute 1 1\n");
  1126. printf("DIMENSION 'critical' '' absolute 1 1\n");
  1127. printf("DIMENSION 'unknown' '' absolute 1 1\n");
  1128. }
  1129. printf("BEGIN 'ipmi.sensor_state_%s'\n", sn_dfe.name);
  1130. printf("SET 'nominal' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_NOMINAL ? 1LL : 0LL);
  1131. printf("SET 'warning' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_WARNING ? 1LL : 0LL);
  1132. printf("SET 'critical' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_CRITICAL ? 1LL : 0LL);
  1133. printf("SET 'unknown' = %lld\n", sn->sensor_state == IPMI_MONITORING_STATE_UNKNOWN ? 1LL : 0LL);
  1134. printf("END\n");
  1135. did_state = true;
  1136. }
  1137. }
  1138. if(likely(did_metric || did_state))
  1139. total_sensors_sent++;
  1140. }
  1141. dfe_done(sn);
  1142. netdata_mutex_unlock(&stdout_mutex);
  1143. return total_sensors_sent;
  1144. }
  1145. static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state) {
  1146. static bool sel_chart_generated = false;
  1147. netdata_mutex_lock(&stdout_mutex);
  1148. if(likely(state->sel.status == ICS_RUNNING)) {
  1149. if(unlikely(!sel_chart_generated)) {
  1150. sel_chart_generated = true;
  1151. printf("CHART ipmi.events '' 'IPMI Events' 'events' 'events' ipmi.sel area %d %d '' '%s' '%s'\n"
  1152. , state->sel.priority + 2
  1153. , (int)(state->sel.freq_ut / USEC_PER_SEC)
  1154. , program_name
  1155. , "sel"
  1156. );
  1157. printf("DIMENSION events '' absolute 1 1\n");
  1158. }
  1159. printf(
  1160. "BEGIN ipmi.events\n"
  1161. "SET events = %zu\n"
  1162. "END\n"
  1163. , state->sel.events
  1164. );
  1165. }
  1166. netdata_mutex_unlock(&stdout_mutex);
  1167. return state->sel.events;
  1168. }
  1169. // ----------------------------------------------------------------------------
  1170. static const char *get_sensor_state_string(struct sensor *sn) {
  1171. switch (sn->sensor_state) {
  1172. case IPMI_MONITORING_STATE_NOMINAL:
  1173. return "nominal";
  1174. case IPMI_MONITORING_STATE_WARNING:
  1175. return "warning";
  1176. case IPMI_MONITORING_STATE_CRITICAL:
  1177. return "critical";
  1178. default:
  1179. return "unknown";
  1180. }
  1181. }
  1182. static const char *get_sensor_function_priority(struct sensor *sn) {
  1183. switch (sn->sensor_state) {
  1184. case IPMI_MONITORING_STATE_WARNING:
  1185. return "warning";
  1186. case IPMI_MONITORING_STATE_CRITICAL:
  1187. return "critical";
  1188. default:
  1189. return "normal";
  1190. }
  1191. }
  1192. static void freeimi_function_sensors(const char *transaction, char *function __maybe_unused,
  1193. usec_t *stop_monotonic_ut __maybe_unused, bool *cancelled __maybe_unused,
  1194. BUFFER *payload __maybe_unused, const char *source __maybe_unused, void *data __maybe_unused) {
  1195. time_t expires = now_realtime_sec() + update_every;
  1196. BUFFER *wb = buffer_create(4096, NULL);
  1197. buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS);
  1198. buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
  1199. buffer_json_member_add_string(wb, "type", "table");
  1200. buffer_json_member_add_time_t(wb, "update_every", update_every);
  1201. buffer_json_member_add_string(wb, "help", "View IPMI sensor readings and its state");
  1202. buffer_json_member_add_array(wb, "data");
  1203. struct sensor *sn;
  1204. dfe_start_reentrant(state.sensors.dict, sn) {
  1205. if (unlikely(!sn->do_metric && !sn->do_state))
  1206. continue;
  1207. double reading = NAN;
  1208. switch (sn->sensor_reading_type) {
  1209. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
  1210. reading = (double)sn->sensor_reading.uint32_value;
  1211. break;
  1212. case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
  1213. reading = (double)(sn->sensor_reading.double_value);
  1214. break;
  1215. case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
  1216. reading = (double)sn->sensor_reading.bool_value;
  1217. break;
  1218. }
  1219. buffer_json_add_array_item_array(wb);
  1220. buffer_json_add_array_item_string(wb, sn->sensor_name);
  1221. buffer_json_add_array_item_string(wb, sn->type);
  1222. buffer_json_add_array_item_string(wb, sn->component);
  1223. buffer_json_add_array_item_double(wb, reading);
  1224. buffer_json_add_array_item_string(wb, sn->units);
  1225. buffer_json_add_array_item_string(wb, get_sensor_state_string(sn));
  1226. buffer_json_add_array_item_object(wb);
  1227. buffer_json_member_add_string(wb, "severity", get_sensor_function_priority(sn));
  1228. buffer_json_object_close(wb);
  1229. buffer_json_array_close(wb);
  1230. }
  1231. dfe_done(sn);
  1232. buffer_json_array_close(wb); // data
  1233. buffer_json_member_add_object(wb, "columns");
  1234. {
  1235. size_t field_id = 0;
  1236. buffer_rrdf_table_add_field(wb, field_id++, "Sensor", "Sensor Name",
  1237. RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
  1238. 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
  1239. RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
  1240. RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH,
  1241. NULL);
  1242. buffer_rrdf_table_add_field(wb, field_id++, "Type", "Sensor Type",
  1243. RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
  1244. 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
  1245. RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
  1246. RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
  1247. NULL);
  1248. buffer_rrdf_table_add_field(wb, field_id++, "Component", "Sensor Component",
  1249. RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
  1250. 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
  1251. RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
  1252. RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
  1253. NULL);
  1254. buffer_rrdf_table_add_field(wb, field_id++, "Reading", "Sensor Current Reading",
  1255. RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
  1256. 2, NULL, 0, RRDF_FIELD_SORT_DESCENDING, NULL,
  1257. RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
  1258. RRDF_FIELD_OPTS_VISIBLE,
  1259. NULL);
  1260. buffer_rrdf_table_add_field(wb, field_id++, "Units", "Sensor Reading Units",
  1261. RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
  1262. 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
  1263. RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
  1264. RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
  1265. NULL);
  1266. buffer_rrdf_table_add_field(wb, field_id++, "State", "Sensor State",
  1267. RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
  1268. 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
  1269. RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
  1270. RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
  1271. NULL);
  1272. buffer_rrdf_table_add_field(
  1273. wb, field_id++,
  1274. "rowOptions", "rowOptions",
  1275. RRDF_FIELD_TYPE_NONE,
  1276. RRDR_FIELD_VISUAL_ROW_OPTIONS,
  1277. RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
  1278. RRDF_FIELD_SORT_FIXED,
  1279. NULL,
  1280. RRDF_FIELD_SUMMARY_COUNT,
  1281. RRDF_FIELD_FILTER_NONE,
  1282. RRDF_FIELD_OPTS_DUMMY,
  1283. NULL);
  1284. }
  1285. buffer_json_object_close(wb); // columns
  1286. buffer_json_member_add_string(wb, "default_sort_column", "Type");
  1287. buffer_json_member_add_object(wb, "charts");
  1288. {
  1289. buffer_json_member_add_object(wb, "Sensors");
  1290. {
  1291. buffer_json_member_add_string(wb, "name", "Sensors");
  1292. buffer_json_member_add_string(wb, "type", "stacked-bar");
  1293. buffer_json_member_add_array(wb, "columns");
  1294. {
  1295. buffer_json_add_array_item_string(wb, "Sensor");
  1296. }
  1297. buffer_json_array_close(wb);
  1298. }
  1299. buffer_json_object_close(wb);
  1300. }
  1301. buffer_json_object_close(wb); // charts
  1302. buffer_json_member_add_array(wb, "default_charts");
  1303. {
  1304. buffer_json_add_array_item_array(wb);
  1305. buffer_json_add_array_item_string(wb, "Sensors");
  1306. buffer_json_add_array_item_string(wb, "Component");
  1307. buffer_json_array_close(wb);
  1308. buffer_json_add_array_item_array(wb);
  1309. buffer_json_add_array_item_string(wb, "Sensors");
  1310. buffer_json_add_array_item_string(wb, "State");
  1311. buffer_json_array_close(wb);
  1312. }
  1313. buffer_json_array_close(wb);
  1314. buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1);
  1315. buffer_json_finalize(wb);
  1316. pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires, wb);
  1317. buffer_free(wb);
  1318. }
  1319. // ----------------------------------------------------------------------------
  1320. // main, command line arguments parsing
  1321. static void plugin_exit(int code) {
  1322. fflush(stdout);
  1323. function_plugin_should_exit = true;
  1324. exit(code);
  1325. }
  1326. int main (int argc, char **argv) {
  1327. clocks_init();
  1328. nd_log_initialize_for_external_plugins("freeipmi.plugin");
  1329. netdata_threads_init_for_external_plugins(0); // set the default threads stack size here
  1330. bool netdata_do_sel = IPMI_ENABLE_SEL_BY_DEFAULT;
  1331. bool debug = false;
  1332. // ------------------------------------------------------------------------
  1333. // parse command line parameters
  1334. int i, freq_s = 0;
  1335. for(i = 1; i < argc ; i++) {
  1336. if(isdigit(*argv[i]) && !freq_s) {
  1337. int n = str2i(argv[i]);
  1338. if(n > 0 && n < 86400) {
  1339. freq_s = n;
  1340. continue;
  1341. }
  1342. }
  1343. else if(strcmp("version", argv[i]) == 0 || strcmp("-version", argv[i]) == 0 || strcmp("--version", argv[i]) == 0 || strcmp("-v", argv[i]) == 0 || strcmp("-V", argv[i]) == 0) {
  1344. printf("%s %s\n", program_name, VERSION);
  1345. exit(0);
  1346. }
  1347. else if(strcmp("debug", argv[i]) == 0) {
  1348. debug = true;
  1349. continue;
  1350. }
  1351. else if(strcmp("sel", argv[i]) == 0) {
  1352. netdata_do_sel = true;
  1353. continue;
  1354. }
  1355. else if(strcmp("no-sel", argv[i]) == 0) {
  1356. netdata_do_sel = false;
  1357. continue;
  1358. }
  1359. else if(strcmp("reread-sdr-cache", argv[i]) == 0) {
  1360. global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_REREAD_SDR_CACHE;
  1361. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_REREAD_SDR_CACHE;
  1362. remove_reread_sdr_after_first_use = false;
  1363. if (debug) fprintf(stderr, "%s: reread-sdr-cache enabled for both sensors and SEL\n", program_name);
  1364. }
  1365. else if(strcmp("interpret-oem-data", argv[i]) == 0) {
  1366. global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_INTERPRET_OEM_DATA;
  1367. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_INTERPRET_OEM_DATA;
  1368. if (debug) fprintf(stderr, "%s: interpret-oem-data enabled for both sensors and SEL\n", program_name);
  1369. }
  1370. else if(strcmp("assume-system-event-record", argv[i]) == 0) {
  1371. global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_ASSUME_SYSTEM_EVENT_RECORD;
  1372. if (debug) fprintf(stderr, "%s: assume-system-event-record enabled\n", program_name);
  1373. }
  1374. else if(strcmp("ignore-non-interpretable-sensors", argv[i]) == 0) {
  1375. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_NON_INTERPRETABLE_SENSORS;
  1376. if (debug) fprintf(stderr, "%s: ignore-non-interpretable-sensors enabled\n", program_name);
  1377. }
  1378. else if(strcmp("bridge-sensors", argv[i]) == 0) {
  1379. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_BRIDGE_SENSORS;
  1380. if (debug) fprintf(stderr, "%s: bridge-sensors enabled\n", program_name);
  1381. }
  1382. else if(strcmp("shared-sensors", argv[i]) == 0) {
  1383. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_SHARED_SENSORS;
  1384. if (debug) fprintf(stderr, "%s: shared-sensors enabled\n", program_name);
  1385. }
  1386. else if(strcmp("no-discrete-reading", argv[i]) == 0) {
  1387. global_sensor_reading_flags &= ~(IPMI_MONITORING_SENSOR_READING_FLAGS_DISCRETE_READING);
  1388. if (debug) fprintf(stderr, "%s: discrete-reading disabled\n", program_name);
  1389. }
  1390. else if(strcmp("ignore-scanning-disabled", argv[i]) == 0) {
  1391. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_IGNORE_SCANNING_DISABLED;
  1392. if (debug) fprintf(stderr, "%s: ignore-scanning-disabled enabled\n", program_name);
  1393. }
  1394. else if(strcmp("assume-bmc-owner", argv[i]) == 0) {
  1395. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ASSUME_BMC_OWNER;
  1396. if (debug) fprintf(stderr, "%s: assume-bmc-owner enabled\n", program_name);
  1397. }
  1398. #if defined(IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES) && defined(IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES)
  1399. else if(strcmp("entity-sensor-names", argv[i]) == 0) {
  1400. global_sel_flags |= IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES;
  1401. global_sensor_reading_flags |= IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES;
  1402. if (debug) fprintf(stderr, "%s: entity-sensor-names enabled for both sensors and SEL\n", program_name);
  1403. }
  1404. #endif
  1405. else if(strcmp("-h", argv[i]) == 0 || strcmp("--help", argv[i]) == 0) {
  1406. fprintf(stderr,
  1407. "\n"
  1408. " netdata %s %s\n"
  1409. " Copyright (C) 2023 Netdata Inc.\n"
  1410. " Released under GNU General Public License v3 or later.\n"
  1411. " All rights reserved.\n"
  1412. "\n"
  1413. " This program is a data collector plugin for netdata.\n"
  1414. "\n"
  1415. " Available command line options:\n"
  1416. "\n"
  1417. " SECONDS data collection frequency\n"
  1418. " minimum: %d\n"
  1419. "\n"
  1420. " debug enable verbose output\n"
  1421. " default: disabled\n"
  1422. "\n"
  1423. " sel\n"
  1424. " no-sel enable/disable SEL collection\n"
  1425. " default: %s\n"
  1426. "\n"
  1427. " reread-sdr-cache re-read SDR cache on every iteration\n"
  1428. " default: disabled\n"
  1429. "\n"
  1430. " interpret-oem-data attempt to parse OEM data\n"
  1431. " default: disabled\n"
  1432. "\n"
  1433. " assume-system-event-record \n"
  1434. " tread illegal SEL events records as normal\n"
  1435. " default: disabled\n"
  1436. "\n"
  1437. " ignore-non-interpretable-sensors \n"
  1438. " do not read sensors that cannot be interpreted\n"
  1439. " default: disabled\n"
  1440. "\n"
  1441. " bridge-sensors bridge sensors not owned by the BMC\n"
  1442. " default: disabled\n"
  1443. "\n"
  1444. " shared-sensors enable shared sensors, if found\n"
  1445. " default: disabled\n"
  1446. "\n"
  1447. " no-discrete-reading do not read sensors that their event/reading type code is invalid\n"
  1448. " default: enabled\n"
  1449. "\n"
  1450. " ignore-scanning-disabled \n"
  1451. " Ignore the scanning bit and read sensors no matter what\n"
  1452. " default: disabled\n"
  1453. "\n"
  1454. " assume-bmc-owner assume the BMC is the sensor owner no matter what\n"
  1455. " (usually bridging is required too)\n"
  1456. " default: disabled\n"
  1457. "\n"
  1458. #if defined(IPMI_MONITORING_SEL_FLAGS_ENTITY_SENSOR_NAMES) && defined(IPMI_MONITORING_SENSOR_READING_FLAGS_ENTITY_SENSOR_NAMES)
  1459. " entity-sensor-names sensor names prefixed with entity id and instance\n"
  1460. " default: disabled\n"
  1461. "\n"
  1462. #endif
  1463. " hostname HOST\n"
  1464. " username USER\n"
  1465. " password PASS connect to remote IPMI host\n"
  1466. " default: local IPMI processor\n"
  1467. "\n"
  1468. " no-auth-code-check\n"
  1469. " noauthcodecheck don't check the authentication codes returned\n"
  1470. "\n"
  1471. " driver-type IPMIDRIVER\n"
  1472. " Specify the driver type to use instead of doing an auto selection. \n"
  1473. " The currently available outofband drivers are LAN and LAN_2_0,\n"
  1474. " which perform IPMI 1.5 and IPMI 2.0 respectively. \n"
  1475. " The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC.\n"
  1476. "\n"
  1477. " sdr-cache-dir PATH directory for SDR cache files\n"
  1478. " default: %s\n"
  1479. "\n"
  1480. " sensor-config-file FILE filename to read sensor configuration\n"
  1481. " default: %s\n"
  1482. "\n"
  1483. " sel-config-file FILE filename to read sel configuration\n"
  1484. " default: %s\n"
  1485. "\n"
  1486. " ignore N1,N2,N3,... sensor IDs to ignore\n"
  1487. " default: none\n"
  1488. "\n"
  1489. " ignore-status N1,N2,N3,... sensor IDs to ignore status (nominal/warning/critical)\n"
  1490. " default: none\n"
  1491. "\n"
  1492. " -v\n"
  1493. " -V\n"
  1494. " version print version and exit\n"
  1495. "\n"
  1496. " Linux kernel module for IPMI is CPU hungry.\n"
  1497. " On Linux run this to lower kipmiN CPU utilization:\n"
  1498. " # echo 10 > /sys/module/ipmi_si/parameters/kipmid_max_busy_us\n"
  1499. "\n"
  1500. " or create: /etc/modprobe.d/ipmi.conf with these contents:\n"
  1501. " options ipmi_si kipmid_max_busy_us=10\n"
  1502. "\n"
  1503. " For more information:\n"
  1504. " https://github.com/netdata/netdata/tree/master/collectors/freeipmi.plugin\n"
  1505. "\n"
  1506. , program_name, VERSION
  1507. , update_every
  1508. , netdata_do_sel?"enabled":"disabled"
  1509. , sdr_cache_directory?sdr_cache_directory:"system default"
  1510. , sensor_config_file?sensor_config_file:"system default"
  1511. , sel_config_file?sel_config_file:"system default"
  1512. );
  1513. exit(1);
  1514. }
  1515. else if(i < argc && strcmp("hostname", argv[i]) == 0) {
  1516. hostname = strdupz(argv[++i]);
  1517. char *s = argv[i];
  1518. // mask it be hidden from the process tree
  1519. while(*s) *s++ = 'x';
  1520. if(debug) fprintf(stderr, "%s: hostname set to '%s'\n", program_name, hostname);
  1521. continue;
  1522. }
  1523. else if(i < argc && strcmp("username", argv[i]) == 0) {
  1524. username = strdupz(argv[++i]);
  1525. char *s = argv[i];
  1526. // mask it be hidden from the process tree
  1527. while(*s) *s++ = 'x';
  1528. if(debug) fprintf(stderr, "%s: username set to '%s'\n", program_name, username);
  1529. continue;
  1530. }
  1531. else if(i < argc && strcmp("password", argv[i]) == 0) {
  1532. password = strdupz(argv[++i]);
  1533. char *s = argv[i];
  1534. // mask it be hidden from the process tree
  1535. while(*s) *s++ = 'x';
  1536. if(debug) fprintf(stderr, "%s: password set to '%s'\n", program_name, password);
  1537. continue;
  1538. }
  1539. else if(strcmp("driver-type", argv[i]) == 0) {
  1540. if (hostname) {
  1541. protocol_version = netdata_parse_outofband_driver_type(argv[++i]);
  1542. if(debug) fprintf(stderr, "%s: outband protocol version set to '%d'\n",
  1543. program_name, protocol_version);
  1544. }
  1545. else {
  1546. driver_type = netdata_parse_inband_driver_type(argv[++i]);
  1547. if(debug) fprintf(stderr, "%s: inband driver type set to '%d'\n",
  1548. program_name, driver_type);
  1549. }
  1550. continue;
  1551. } else if (i < argc && (strcmp("noauthcodecheck", argv[i]) == 0 || strcmp("no-auth-code-check", argv[i]) == 0)) {
  1552. if (!hostname || netdata_host_is_localhost(hostname)) {
  1553. if (debug)
  1554. fprintf(stderr, "%s: noauthcodecheck workaround flag is ignored for inband configuration\n",
  1555. program_name);
  1556. }
  1557. else if (protocol_version < 0 || protocol_version == IPMI_MONITORING_PROTOCOL_VERSION_1_5) {
  1558. workaround_flags |= IPMI_MONITORING_WORKAROUND_FLAGS_PROTOCOL_VERSION_1_5_NO_AUTH_CODE_CHECK;
  1559. if (debug)
  1560. fprintf(stderr, "%s: noauthcodecheck workaround flag enabled\n", program_name);
  1561. }
  1562. else {
  1563. if (debug)
  1564. fprintf(stderr, "%s: noauthcodecheck workaround flag is ignored for protocol version 2.0\n",
  1565. program_name);
  1566. }
  1567. continue;
  1568. }
  1569. else if(i < argc && strcmp("sdr-cache-dir", argv[i]) == 0) {
  1570. sdr_cache_directory = argv[++i];
  1571. if(debug)
  1572. fprintf(stderr, "%s: SDR cache directory set to '%s'\n", program_name, sdr_cache_directory);
  1573. continue;
  1574. }
  1575. else if(i < argc && strcmp("sensor-config-file", argv[i]) == 0) {
  1576. sensor_config_file = argv[++i];
  1577. if(debug) fprintf(stderr, "%s: sensor config file set to '%s'\n", program_name, sensor_config_file);
  1578. continue;
  1579. }
  1580. else if(i < argc && strcmp("sel-config-file", argv[i]) == 0) {
  1581. sel_config_file = argv[++i];
  1582. if(debug) fprintf(stderr, "%s: sel config file set to '%s'\n", program_name, sel_config_file);
  1583. continue;
  1584. }
  1585. else if(i < argc && strcmp("ignore", argv[i]) == 0) {
  1586. excluded_record_ids_parse(argv[++i], debug);
  1587. continue;
  1588. }
  1589. else if(i < argc && strcmp("ignore-status", argv[i]) == 0) {
  1590. excluded_status_record_ids_parse(argv[++i], debug);
  1591. continue;
  1592. }
  1593. collector_error("%s(): ignoring parameter '%s'", __FUNCTION__, argv[i]);
  1594. }
  1595. errno = 0;
  1596. if(freq_s && freq_s < update_every)
  1597. collector_info("%s(): update frequency %d seconds is too small for IPMI. Using %d.",
  1598. __FUNCTION__, freq_s, update_every);
  1599. update_every = freq_s = MAX(freq_s, update_every);
  1600. update_every_sel = MAX(update_every, update_every_sel);
  1601. // ------------------------------------------------------------------------
  1602. // initialize IPMI
  1603. if(debug) {
  1604. fprintf(stderr, "%s: calling ipmi_monitoring_init()\n", program_name);
  1605. ipmimonitoring_init_flags |= IPMI_MONITORING_FLAGS_DEBUG|IPMI_MONITORING_FLAGS_DEBUG_IPMI_PACKETS;
  1606. }
  1607. int rc;
  1608. if(ipmi_monitoring_init(ipmimonitoring_init_flags, &rc) < 0)
  1609. fatal("ipmi_monitoring_init: %s", ipmi_monitoring_ctx_strerror(rc));
  1610. // ------------------------------------------------------------------------
  1611. // create the data collection threads
  1612. struct ipmi_collection_thread sensors_data = {
  1613. .type = IPMI_COLLECT_TYPE_SENSORS,
  1614. .freq_s = update_every,
  1615. .spinlock = NETDATA_SPINLOCK_INITIALIZER,
  1616. .debug = debug,
  1617. .state = {
  1618. .debug = debug,
  1619. .sensors = {
  1620. .status = ICS_INIT,
  1621. .last_iteration_ut = now_monotonic_usec(),
  1622. .freq_ut = update_every * USEC_PER_SEC,
  1623. .priority = IPMI_SENSORS_DASHBOARD_PRIORITY,
  1624. .dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE|DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct sensor)),
  1625. },
  1626. },
  1627. }, sel_data = {
  1628. .type = IPMI_COLLECT_TYPE_SEL,
  1629. .freq_s = update_every_sel,
  1630. .spinlock = NETDATA_SPINLOCK_INITIALIZER,
  1631. .debug = debug,
  1632. .state = {
  1633. .debug = debug,
  1634. .sel = {
  1635. .status = ICS_INIT,
  1636. .last_iteration_ut = now_monotonic_usec(),
  1637. .freq_ut = update_every_sel * USEC_PER_SEC,
  1638. .priority = IPMI_SEL_DASHBOARD_PRIORITY,
  1639. },
  1640. },
  1641. };
  1642. netdata_thread_t sensors_thread = 0, sel_thread = 0;
  1643. netdata_thread_create(&sensors_thread, "IPMI[sensors]", NETDATA_THREAD_OPTION_DONT_LOG, netdata_ipmi_collection_thread, &sensors_data);
  1644. if(netdata_do_sel)
  1645. netdata_thread_create(&sel_thread, "IPMI[sel]", NETDATA_THREAD_OPTION_DONT_LOG, netdata_ipmi_collection_thread, &sel_data);
  1646. // ------------------------------------------------------------------------
  1647. // the main loop
  1648. if(debug) fprintf(stderr, "%s: starting data collection\n", program_name);
  1649. time_t started_t = now_monotonic_sec();
  1650. size_t iteration = 0;
  1651. usec_t step = 100 * USEC_PER_MS;
  1652. bool global_chart_created = false;
  1653. bool tty = isatty(fileno(stdout)) == 1;
  1654. heartbeat_t hb;
  1655. heartbeat_init(&hb);
  1656. for(iteration = 0; 1 ; iteration++) {
  1657. usec_t dt = heartbeat_next(&hb, step);
  1658. if (!tty) {
  1659. netdata_mutex_lock(&stdout_mutex);
  1660. fprintf(stdout, "\n"); // keepalive to avoid parser read timeout (2 minutes) during ipmi_detect_speed_secs()
  1661. fflush(stdout);
  1662. netdata_mutex_unlock(&stdout_mutex);
  1663. }
  1664. spinlock_lock(&sensors_data.spinlock);
  1665. state.sensors = sensors_data.state.sensors;
  1666. spinlock_unlock(&sensors_data.spinlock);
  1667. spinlock_lock(&sel_data.spinlock);
  1668. state.sel = sel_data.state.sel;
  1669. spinlock_unlock(&sel_data.spinlock);
  1670. switch(state.sensors.status) {
  1671. case ICS_RUNNING:
  1672. step = update_every * USEC_PER_SEC;
  1673. if(state.sensors.last_iteration_ut < now_monotonic_usec() - IPMI_RESTART_IF_SENSORS_DONT_ITERATE_EVERY_SECONDS * USEC_PER_SEC) {
  1674. collector_error("%s(): sensors have not be collected for %zu seconds. Exiting to restart.",
  1675. __FUNCTION__, (size_t)((now_monotonic_usec() - state.sensors.last_iteration_ut) / USEC_PER_SEC));
  1676. fprintf(stdout, "EXIT\n");
  1677. plugin_exit(0);
  1678. }
  1679. break;
  1680. case ICS_INIT:
  1681. continue;
  1682. case ICS_INIT_FAILED:
  1683. collector_error("%s(): sensors failed to initialize. Calling DISABLE.", __FUNCTION__);
  1684. fprintf(stdout, "DISABLE\n");
  1685. plugin_exit(0);
  1686. case ICS_FAILED:
  1687. collector_error("%s(): sensors fails repeatedly to collect metrics. Exiting to restart.", __FUNCTION__);
  1688. fprintf(stdout, "EXIT\n");
  1689. plugin_exit(0);
  1690. }
  1691. if(netdata_do_sel) {
  1692. switch (state.sensors.status) {
  1693. case ICS_RUNNING:
  1694. case ICS_INIT:
  1695. break;
  1696. case ICS_INIT_FAILED:
  1697. case ICS_FAILED:
  1698. collector_error("%s(): SEL fails to collect events. Disabling SEL collection.", __FUNCTION__);
  1699. netdata_do_sel = false;
  1700. break;
  1701. }
  1702. }
  1703. if(unlikely(debug))
  1704. fprintf(stderr, "%s: calling send_ipmi_sensor_metrics_to_netdata()\n", program_name);
  1705. static bool add_func_sensors = true;
  1706. if (add_func_sensors) {
  1707. add_func_sensors = false;
  1708. struct functions_evloop_globals *wg =
  1709. functions_evloop_init(1, "FREEIPMI", &stdout_mutex, &function_plugin_should_exit);
  1710. functions_evloop_add_function(
  1711. wg, "ipmi-sensors", freeimi_function_sensors, PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT, NULL);
  1712. FREEIPMI_GLOBAL_FUNCTION_SENSORS();
  1713. }
  1714. state.updates.now_ut = now_monotonic_usec();
  1715. send_ipmi_sensor_metrics_to_netdata(&state);
  1716. if(netdata_do_sel)
  1717. send_ipmi_sel_metrics_to_netdata(&state);
  1718. if(unlikely(debug))
  1719. fprintf(stderr, "%s: iteration %zu, dt %"PRIu64" usec, sensors ever collected %zu, sensors last collected %zu \n"
  1720. , program_name
  1721. , iteration
  1722. , dt
  1723. , dictionary_entries(state.sensors.dict)
  1724. , state.sensors.collected
  1725. );
  1726. netdata_mutex_lock(&stdout_mutex);
  1727. if (!global_chart_created) {
  1728. global_chart_created = true;
  1729. fprintf(stdout,
  1730. "CHART netdata.freeipmi_availability_status '' 'Plugin availability status' 'status' "
  1731. "plugins netdata.plugin_availability_status line 146000 %d '' '%s' '%s'\n"
  1732. "DIMENSION available '' absolute 1 1\n",
  1733. update_every, program_name, "");
  1734. }
  1735. fprintf(stdout,
  1736. "BEGIN netdata.freeipmi_availability_status\n"
  1737. "SET available = 1\n"
  1738. "END\n");
  1739. // restart check (14400 seconds)
  1740. if (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS) {
  1741. collector_info("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__);
  1742. fprintf(stdout, "EXIT\n");
  1743. plugin_exit(0);
  1744. }
  1745. fflush(stdout);
  1746. netdata_mutex_unlock(&stdout_mutex);
  1747. }
  1748. }