smartd_log.chart.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790
  1. # -*- coding: utf-8 -*-
  2. # Description: smart netdata python.d module
  3. # Author: ilyam8, vorph1
  4. # SPDX-License-Identifier: GPL-3.0-or-later
  5. import os
  6. import re
  7. from copy import deepcopy
  8. from time import time
  9. from bases.FrameworkServices.SimpleService import SimpleService
  10. from bases.collection import read_last_line
  11. INCREMENTAL = 'incremental'
  12. ABSOLUTE = 'absolute'
  13. ATA = 'ata'
  14. SCSI = 'scsi'
  15. CSV = '.csv'
  16. DEF_RESCAN_INTERVAL = 60
  17. DEF_AGE = 30
  18. DEF_PATH = '/var/log/smartd'
  19. ATTR1 = '1'
  20. ATTR2 = '2'
  21. ATTR3 = '3'
  22. ATTR4 = '4'
  23. ATTR5 = '5'
  24. ATTR7 = '7'
  25. ATTR8 = '8'
  26. ATTR9 = '9'
  27. ATTR10 = '10'
  28. ATTR11 = '11'
  29. ATTR12 = '12'
  30. ATTR13 = '13'
  31. ATTR170 = '170'
  32. ATTR171 = '171'
  33. ATTR172 = '172'
  34. ATTR173 = '173'
  35. ATTR174 = '174'
  36. ATTR177 = '177'
  37. ATTR180 = '180'
  38. ATTR183 = '183'
  39. ATTR190 = '190'
  40. ATTR194 = '194'
  41. ATTR196 = '196'
  42. ATTR197 = '197'
  43. ATTR198 = '198'
  44. ATTR199 = '199'
  45. ATTR202 = '202'
  46. ATTR206 = '206'
  47. ATTR233 = '233'
  48. ATTR241 = '241'
  49. ATTR242 = '242'
  50. ATTR249 = '249'
  51. ATTR_READ_ERR_COR = 'read-total-err-corrected'
  52. ATTR_READ_ERR_UNC = 'read-total-unc-errors'
  53. ATTR_WRITE_ERR_COR = 'write-total-err-corrected'
  54. ATTR_WRITE_ERR_UNC = 'write-total-unc-errors'
  55. ATTR_VERIFY_ERR_COR = 'verify-total-err-corrected'
  56. ATTR_VERIFY_ERR_UNC = 'verify-total-unc-errors'
  57. ATTR_TEMPERATURE = 'temperature'
  58. RE_ATA = re.compile(
  59. '(\d+);' # attribute
  60. '(\d+);' # normalized value
  61. '(\d+)', # raw value
  62. re.X
  63. )
  64. RE_SCSI = re.compile(
  65. '([a-z-]+);' # attribute
  66. '([0-9.]+)', # raw value
  67. re.X
  68. )
  69. ORDER = [
  70. # errors
  71. 'read_error_rate',
  72. 'seek_error_rate',
  73. 'soft_read_error_rate',
  74. 'write_error_rate',
  75. 'read_total_err_corrected',
  76. 'read_total_unc_errors',
  77. 'write_total_err_corrected',
  78. 'write_total_unc_errors',
  79. 'verify_total_err_corrected',
  80. 'verify_total_unc_errors',
  81. # external failure
  82. 'sata_interface_downshift',
  83. 'udma_crc_error_count',
  84. # performance
  85. 'throughput_performance',
  86. 'seek_time_performance',
  87. # power
  88. 'start_stop_count',
  89. 'power_on_hours_count',
  90. 'power_cycle_count',
  91. 'unexpected_power_loss',
  92. # spin
  93. 'spin_up_time',
  94. 'spin_up_retries',
  95. 'calibration_retries',
  96. # temperature
  97. 'airflow_temperature_celsius',
  98. 'temperature_celsius',
  99. # wear
  100. 'reallocated_sectors_count',
  101. 'reserved_block_count',
  102. 'program_fail_count',
  103. 'erase_fail_count',
  104. 'wear_leveller_worst_case_erase_count',
  105. 'unused_reserved_nand_blocks',
  106. 'reallocation_event_count',
  107. 'current_pending_sector_count',
  108. 'offline_uncorrectable_sector_count',
  109. 'percent_lifetime_used',
  110. 'media_wearout_indicator',
  111. 'total_lbas_written',
  112. 'total_lbas_read',
  113. ]
  114. CHARTS = {
  115. 'read_error_rate': {
  116. 'options': [None, 'Read Error Rate', 'value', 'errors', 'smartd_log.read_error_rate', 'line'],
  117. 'lines': [],
  118. 'attrs': [ATTR1],
  119. 'algo': ABSOLUTE,
  120. },
  121. 'seek_error_rate': {
  122. 'options': [None, 'Seek Error Rate', 'value', 'errors', 'smartd_log.seek_error_rate', 'line'],
  123. 'lines': [],
  124. 'attrs': [ATTR7],
  125. 'algo': ABSOLUTE,
  126. },
  127. 'soft_read_error_rate': {
  128. 'options': [None, 'Soft Read Error Rate', 'errors', 'errors', 'smartd_log.soft_read_error_rate', 'line'],
  129. 'lines': [],
  130. 'attrs': [ATTR13],
  131. 'algo': INCREMENTAL,
  132. },
  133. 'write_error_rate': {
  134. 'options': [None, 'Write Error Rate', 'value', 'errors', 'smartd_log.write_error_rate', 'line'],
  135. 'lines': [],
  136. 'attrs': [ATTR206],
  137. 'algo': ABSOLUTE,
  138. },
  139. 'read_total_err_corrected': {
  140. 'options': [None, 'Read Error Corrected', 'errors', 'errors', 'smartd_log.read_total_err_corrected', 'line'],
  141. 'lines': [],
  142. 'attrs': [ATTR_READ_ERR_COR],
  143. 'algo': INCREMENTAL,
  144. },
  145. 'read_total_unc_errors': {
  146. 'options': [None, 'Read Error Uncorrected', 'errors', 'errors', 'smartd_log.read_total_unc_errors', 'line'],
  147. 'lines': [],
  148. 'attrs': [ATTR_READ_ERR_UNC],
  149. 'algo': INCREMENTAL,
  150. },
  151. 'write_total_err_corrected': {
  152. 'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.write_total_err_corrected', 'line'],
  153. 'lines': [],
  154. 'attrs': [ATTR_WRITE_ERR_COR],
  155. 'algo': INCREMENTAL,
  156. },
  157. 'write_total_unc_errors': {
  158. 'options': [None, 'Write Error Uncorrected', 'errors', 'errors', 'smartd_log.write_total_unc_errors', 'line'],
  159. 'lines': [],
  160. 'attrs': [ATTR_WRITE_ERR_UNC],
  161. 'algo': INCREMENTAL,
  162. },
  163. 'verify_total_err_corrected': {
  164. 'options': [None, 'Verify Error Corrected', 'errors', 'errors', 'smartd_log.verify_total_err_corrected',
  165. 'line'],
  166. 'lines': [],
  167. 'attrs': [ATTR_VERIFY_ERR_COR],
  168. 'algo': INCREMENTAL,
  169. },
  170. 'verify_total_unc_errors': {
  171. 'options': [None, 'Verify Error Uncorrected', 'errors', 'errors', 'smartd_log.verify_total_unc_errors', 'line'],
  172. 'lines': [],
  173. 'attrs': [ATTR_VERIFY_ERR_UNC],
  174. 'algo': INCREMENTAL,
  175. },
  176. 'sata_interface_downshift': {
  177. 'options': [None, 'SATA Interface Downshift', 'events', 'external failure',
  178. 'smartd_log.sata_interface_downshift', 'line'],
  179. 'lines': [],
  180. 'attrs': [ATTR183],
  181. 'algo': INCREMENTAL,
  182. },
  183. 'udma_crc_error_count': {
  184. 'options': [None, 'UDMA CRC Error Count', 'errors', 'external failure', 'smartd_log.udma_crc_error_count',
  185. 'line'],
  186. 'lines': [],
  187. 'attrs': [ATTR199],
  188. 'algo': INCREMENTAL,
  189. },
  190. 'throughput_performance': {
  191. 'options': [None, 'Throughput Performance', 'value', 'performance', 'smartd_log.throughput_performance',
  192. 'line'],
  193. 'lines': [],
  194. 'attrs': [ATTR2],
  195. 'algo': ABSOLUTE,
  196. },
  197. 'seek_time_performance': {
  198. 'options': [None, 'Seek Time Performance', 'value', 'performance', 'smartd_log.seek_time_performance', 'line'],
  199. 'lines': [],
  200. 'attrs': [ATTR8],
  201. 'algo': ABSOLUTE,
  202. },
  203. 'start_stop_count': {
  204. 'options': [None, 'Start/Stop Count', 'events', 'power', 'smartd_log.start_stop_count', 'line'],
  205. 'lines': [],
  206. 'attrs': [ATTR4],
  207. 'algo': ABSOLUTE,
  208. },
  209. 'power_on_hours_count': {
  210. 'options': [None, 'Power-On Hours Count', 'hours', 'power', 'smartd_log.power_on_hours_count', 'line'],
  211. 'lines': [],
  212. 'attrs': [ATTR9],
  213. 'algo': ABSOLUTE,
  214. },
  215. 'power_cycle_count': {
  216. 'options': [None, 'Power Cycle Count', 'events', 'power', 'smartd_log.power_cycle_count', 'line'],
  217. 'lines': [],
  218. 'attrs': [ATTR12],
  219. 'algo': ABSOLUTE,
  220. },
  221. 'unexpected_power_loss': {
  222. 'options': [None, 'Unexpected Power Loss', 'events', 'power', 'smartd_log.unexpected_power_loss', 'line'],
  223. 'lines': [],
  224. 'attrs': [ATTR174],
  225. 'algo': ABSOLUTE,
  226. },
  227. 'spin_up_time': {
  228. 'options': [None, 'Spin-Up Time', 'ms', 'spin', 'smartd_log.spin_up_time', 'line'],
  229. 'lines': [],
  230. 'attrs': [ATTR3],
  231. 'algo': ABSOLUTE,
  232. },
  233. 'spin_up_retries': {
  234. 'options': [None, 'Spin-up Retries', 'retries', 'spin', 'smartd_log.spin_up_retries', 'line'],
  235. 'lines': [],
  236. 'attrs': [ATTR10],
  237. 'algo': INCREMENTAL,
  238. },
  239. 'calibration_retries': {
  240. 'options': [None, 'Calibration Retries', 'retries', 'spin', 'smartd_log.calibration_retries', 'line'],
  241. 'lines': [],
  242. 'attrs': [ATTR11],
  243. 'algo': INCREMENTAL,
  244. },
  245. 'airflow_temperature_celsius': {
  246. 'options': [None, 'Airflow Temperature Celsius', 'celsius', 'temperature',
  247. 'smartd_log.airflow_temperature_celsius', 'line'],
  248. 'lines': [],
  249. 'attrs': [ATTR190],
  250. 'algo': ABSOLUTE,
  251. },
  252. 'temperature_celsius': {
  253. 'options': [None, 'Temperature', 'celsius', 'temperature', 'smartd_log.temperature_celsius', 'line'],
  254. 'lines': [],
  255. 'attrs': [ATTR194, ATTR_TEMPERATURE],
  256. 'algo': ABSOLUTE,
  257. },
  258. 'reallocated_sectors_count': {
  259. 'options': [None, 'Reallocated Sectors Count', 'sectors', 'wear', 'smartd_log.reallocated_sectors_count',
  260. 'line'],
  261. 'lines': [],
  262. 'attrs': [ATTR5],
  263. 'algo': ABSOLUTE,
  264. },
  265. 'reserved_block_count': {
  266. 'options': [None, 'Reserved Block Count', 'percentage', 'wear', 'smartd_log.reserved_block_count', 'line'],
  267. 'lines': [],
  268. 'attrs': [ATTR170],
  269. 'algo': ABSOLUTE,
  270. },
  271. 'program_fail_count': {
  272. 'options': [None, 'Program Fail Count', 'errors', 'wear', 'smartd_log.program_fail_count', 'line'],
  273. 'lines': [],
  274. 'attrs': [ATTR171],
  275. 'algo': INCREMENTAL,
  276. },
  277. 'erase_fail_count': {
  278. 'options': [None, 'Erase Fail Count', 'failures', 'wear', 'smartd_log.erase_fail_count', 'line'],
  279. 'lines': [],
  280. 'attrs': [ATTR172],
  281. 'algo': INCREMENTAL,
  282. },
  283. 'wear_leveller_worst_case_erase_count': {
  284. 'options': [None, 'Wear Leveller Worst Case Erase Count', 'erases', 'wear',
  285. 'smartd_log.wear_leveller_worst_case_erase_count', 'line'],
  286. 'lines': [],
  287. 'attrs': [ATTR173],
  288. 'algo': ABSOLUTE,
  289. },
  290. 'unused_reserved_nand_blocks': {
  291. 'options': [None, 'Unused Reserved NAND Blocks', 'blocks', 'wear', 'smartd_log.unused_reserved_nand_blocks',
  292. 'line'],
  293. 'lines': [],
  294. 'attrs': [ATTR180],
  295. 'algo': ABSOLUTE,
  296. },
  297. 'reallocation_event_count': {
  298. 'options': [None, 'Reallocation Event Count', 'events', 'wear', 'smartd_log.reallocation_event_count', 'line'],
  299. 'lines': [],
  300. 'attrs': [ATTR196],
  301. 'algo': INCREMENTAL,
  302. },
  303. 'current_pending_sector_count': {
  304. 'options': [None, 'Current Pending Sector Count', 'sectors', 'wear', 'smartd_log.current_pending_sector_count',
  305. 'line'],
  306. 'lines': [],
  307. 'attrs': [ATTR197],
  308. 'algo': ABSOLUTE,
  309. },
  310. 'offline_uncorrectable_sector_count': {
  311. 'options': [None, 'Offline Uncorrectable Sector Count', 'sectors', 'wear',
  312. 'smartd_log.offline_uncorrectable_sector_count', 'line'],
  313. 'lines': [],
  314. 'attrs': [ATTR198],
  315. 'algo': ABSOLUTE,
  316. },
  317. 'percent_lifetime_used': {
  318. 'options': [None, 'Percent Lifetime Used', 'percentage', 'wear', 'smartd_log.percent_lifetime_used', 'line'],
  319. 'lines': [],
  320. 'attrs': [ATTR202],
  321. 'algo': ABSOLUTE,
  322. },
  323. 'media_wearout_indicator': {
  324. 'options': [None, 'Media Wearout Indicator', 'percentage', 'wear', 'smartd_log.media_wearout_indicator', 'line'],
  325. 'lines': [],
  326. 'attrs': [ATTR233, ATTR177],
  327. 'algo': ABSOLUTE,
  328. },
  329. 'nand_writes_1gib': {
  330. 'options': [None, 'NAND Writes', 'GiB', 'wear', 'smartd_log.nand_writes_1gib', 'line'],
  331. 'lines': [],
  332. 'attrs': [ATTR249],
  333. 'algo': ABSOLUTE,
  334. },
  335. 'total_lbas_written': {
  336. 'options': [None, 'Total LBAs Written', 'sectors', 'wear', 'smartd_log.total_lbas_written', 'line'],
  337. 'lines': [],
  338. 'attrs': [ATTR241],
  339. 'algo': ABSOLUTE,
  340. },
  341. 'total_lbas_read': {
  342. 'options': [None, 'Total LBAs Read', 'sectors', 'wear', 'smartd_log.total_lbas_read', 'line'],
  343. 'lines': [],
  344. 'attrs': [ATTR242],
  345. 'algo': ABSOLUTE,
  346. },
  347. }
  348. # NOTE: 'parse_temp' decodes ATA 194 raw value. Not heavily tested. Written by @Ferroin
  349. # C code:
  350. # https://github.com/smartmontools/smartmontools/blob/master/smartmontools/atacmds.cpp#L2051
  351. #
  352. # Calling 'parse_temp' on the raw value will return a 4-tuple, containing
  353. # * temperature
  354. # * minimum
  355. # * maximum
  356. # * over-temperature count
  357. # substituting None for values it can't decode.
  358. #
  359. # Example:
  360. # >>> parse_temp(42952491042)
  361. # >>> (34, 10, 43, None)
  362. #
  363. #
  364. # def check_temp_word(i):
  365. # if i <= 0x7F:
  366. # return 0x11
  367. # elif i <= 0xFF:
  368. # return 0x01
  369. # elif 0xFF80 <= i:
  370. # return 0x10
  371. # return 0x00
  372. #
  373. #
  374. # def check_temp_range(t, b0, b1):
  375. # if b0 > b1:
  376. # t0, t1 = b1, b0
  377. # else:
  378. # t0, t1 = b0, b1
  379. #
  380. # if all([
  381. # -60 <= t0,
  382. # t0 <= t,
  383. # t <= t1,
  384. # t1 <= 120,
  385. # not (t0 == -1 and t1 <= 0)
  386. # ]):
  387. # return t0, t1
  388. # return None, None
  389. #
  390. #
  391. # def parse_temp(raw):
  392. # byte = list()
  393. # word = list()
  394. # for i in range(0, 6):
  395. # byte.append(0xFF & (raw >> (i * 8)))
  396. # for i in range(0, 3):
  397. # word.append(0xFFFF & (raw >> (i * 16)))
  398. #
  399. # ctwd = check_temp_word(word[0])
  400. #
  401. # if not word[2]:
  402. # if ctwd and not word[1]:
  403. # # byte[0] is temp, no other data
  404. # return byte[0], None, None, None
  405. #
  406. # if ctwd and all(check_temp_range(byte[0], byte[2], byte[3])):
  407. # # byte[0] is temp, byte[2] is max or min, byte[3] is min or max
  408. # trange = check_temp_range(byte[0], byte[2], byte[3])
  409. # return byte[0], trange[0], trange[1], None
  410. #
  411. # if ctwd and all(check_temp_range(byte[0], byte[1], byte[2])):
  412. # # byte[0] is temp, byte[1] is max or min, byte[2] is min or max
  413. # trange = check_temp_range(byte[0], byte[1], byte[2])
  414. # return byte[0], trange[0], trange[1], None
  415. #
  416. # return None, None, None, None
  417. #
  418. # if ctwd:
  419. # if all(
  420. # [
  421. # ctwd & check_temp_word(word[1]) & check_temp_word(word[2]) != 0x00,
  422. # all(check_temp_range(byte[0], byte[2], byte[4])),
  423. # ]
  424. # ):
  425. # # byte[0] is temp, byte[2] is max or min, byte[4] is min or max
  426. # trange = check_temp_range(byte[0], byte[2], byte[4])
  427. # return byte[0], trange[0], trange[1], None
  428. # else:
  429. # trange = check_temp_range(byte[0], byte[2], byte[3])
  430. # if word[2] < 0x7FFF and all(trange) and trange[1] >= 40:
  431. # # byte[0] is temp, byte[2] is max or min, byte[3] is min or max, word[2] is overtemp count
  432. # return byte[0], trange[0], trange[1], word[2]
  433. # # no data
  434. # return None, None, None, None
  435. CHARTED_ATTRS = dict((attr, k) for k, v in CHARTS.items() for attr in v['attrs'])
  436. class BaseAtaSmartAttribute:
  437. def __init__(self, name, normalized_value, raw_value):
  438. self.name = name
  439. self.normalized_value = normalized_value
  440. self.raw_value = raw_value
  441. def value(self):
  442. raise NotImplementedError
  443. class AtaRaw(BaseAtaSmartAttribute):
  444. def value(self):
  445. return self.raw_value
  446. class AtaNormalized(BaseAtaSmartAttribute):
  447. def value(self):
  448. return self.normalized_value
  449. class Ata3(BaseAtaSmartAttribute):
  450. def value(self):
  451. value = int(self.raw_value)
  452. # https://github.com/netdata/netdata/issues/5919
  453. #
  454. # 3;151;38684000679;
  455. # 423 (Average 447)
  456. # 38684000679 & 0xFFF -> 423
  457. # (38684000679 & 0xFFF0000) >> 16 -> 447
  458. if value > 1e6:
  459. return value & 0xFFF
  460. return value
  461. class Ata9(BaseAtaSmartAttribute):
  462. def value(self):
  463. value = int(self.raw_value)
  464. if value > 1e6:
  465. return value & 0xFFFF
  466. return value
  467. class Ata190(BaseAtaSmartAttribute):
  468. def value(self):
  469. return 100 - int(self.normalized_value)
  470. class Ata194(BaseAtaSmartAttribute):
  471. # https://github.com/netdata/netdata/issues/3041
  472. # https://github.com/netdata/netdata/issues/5919
  473. #
  474. # The low byte is the current temperature, the third lowest is the maximum, and the fifth lowest is the minimum
  475. def value(self):
  476. value = int(self.raw_value)
  477. if value > 1e6:
  478. return value & 0xFF
  479. return min(int(self.normalized_value), int(self.raw_value))
  480. class BaseSCSISmartAttribute:
  481. def __init__(self, name, raw_value):
  482. self.name = name
  483. self.raw_value = raw_value
  484. def value(self):
  485. raise NotImplementedError
  486. class SCSIRaw(BaseSCSISmartAttribute):
  487. def value(self):
  488. return self.raw_value
  489. def ata_attribute_factory(value):
  490. name = value[0]
  491. if name == ATTR3:
  492. return Ata3(*value)
  493. elif name == ATTR9:
  494. return Ata9(*value)
  495. elif name == ATTR190:
  496. return Ata190(*value)
  497. elif name == ATTR194:
  498. return Ata194(*value)
  499. elif name in [
  500. ATTR1,
  501. ATTR7,
  502. ATTR177,
  503. ATTR202,
  504. ATTR206,
  505. ATTR233,
  506. ]:
  507. return AtaNormalized(*value)
  508. return AtaRaw(*value)
  509. def scsi_attribute_factory(value):
  510. return SCSIRaw(*value)
  511. def attribute_factory(value):
  512. name = value[0]
  513. if name.isdigit():
  514. return ata_attribute_factory(value)
  515. return scsi_attribute_factory(value)
  516. def handle_error(*errors):
  517. def on_method(method):
  518. def on_call(*args):
  519. try:
  520. return method(*args)
  521. except errors:
  522. return None
  523. return on_call
  524. return on_method
  525. class DiskLogFile:
  526. def __init__(self, full_path):
  527. self.path = full_path
  528. self.size = os.path.getsize(full_path)
  529. @handle_error(OSError)
  530. def is_changed(self):
  531. return self.size != os.path.getsize(self.path)
  532. @handle_error(OSError)
  533. def is_active(self, current_time, limit):
  534. return (current_time - os.path.getmtime(self.path)) / 60 < limit
  535. @handle_error(OSError)
  536. def read(self):
  537. self.size = os.path.getsize(self.path)
  538. return read_last_line(self.path)
  539. class BaseDisk:
  540. def __init__(self, name, log_file):
  541. self.raw_name = name
  542. self.name = re.sub(r'_+', '_', name)
  543. self.log_file = log_file
  544. self.attrs = list()
  545. self.alive = True
  546. self.charted = False
  547. def __eq__(self, other):
  548. if isinstance(other, BaseDisk):
  549. return self.raw_name == other.raw_name
  550. return self.raw_name == other
  551. def __ne__(self, other):
  552. return not self == other
  553. def __hash__(self):
  554. return hash(repr(self))
  555. def parser(self, data):
  556. raise NotImplementedError
  557. @handle_error(TypeError)
  558. def populate_attrs(self):
  559. self.attrs = list()
  560. line = self.log_file.read()
  561. for value in self.parser(line):
  562. self.attrs.append(attribute_factory(value))
  563. return len(self.attrs)
  564. def data(self):
  565. data = dict()
  566. for attr in self.attrs:
  567. data['{0}_{1}'.format(self.name, attr.name)] = attr.value()
  568. return data
  569. class ATADisk(BaseDisk):
  570. def parser(self, data):
  571. return RE_ATA.findall(data)
  572. class SCSIDisk(BaseDisk):
  573. def parser(self, data):
  574. return RE_SCSI.findall(data)
  575. class Service(SimpleService):
  576. def __init__(self, configuration=None, name=None):
  577. SimpleService.__init__(self, configuration=configuration, name=name)
  578. self.order = ORDER
  579. self.definitions = deepcopy(CHARTS)
  580. self.log_path = configuration.get('log_path', DEF_PATH)
  581. self.age = configuration.get('age', DEF_AGE)
  582. self.exclude = configuration.get('exclude_disks', str()).split()
  583. self.disks = list()
  584. self.runs = 0
  585. self.do_force_rescan = False
  586. def check(self):
  587. return self.scan() > 0
  588. def get_data(self):
  589. self.runs += 1
  590. if self.do_force_rescan or self.runs % DEF_RESCAN_INTERVAL == 0:
  591. self.cleanup()
  592. self.scan()
  593. self.do_force_rescan = False
  594. data = dict()
  595. for disk in self.disks:
  596. if not disk.alive:
  597. continue
  598. if not disk.charted:
  599. self.add_disk_to_charts(disk)
  600. changed = disk.log_file.is_changed()
  601. if changed is None:
  602. disk.alive = False
  603. self.do_force_rescan = True
  604. continue
  605. if changed and disk.populate_attrs() is None:
  606. disk.alive = False
  607. self.do_force_rescan = True
  608. continue
  609. data.update(disk.data())
  610. return data
  611. def cleanup(self):
  612. current_time = time()
  613. for disk in self.disks[:]:
  614. if any(
  615. [
  616. not disk.alive,
  617. not disk.log_file.is_active(current_time, self.age),
  618. ]
  619. ):
  620. self.disks.remove(disk.raw_name)
  621. self.remove_disk_from_charts(disk)
  622. def scan(self):
  623. self.debug('scanning {0}'.format(self.log_path))
  624. current_time = time()
  625. for full_name in os.listdir(self.log_path):
  626. disk = self.create_disk_from_file(full_name, current_time)
  627. if not disk:
  628. continue
  629. self.disks.append(disk)
  630. return len(self.disks)
  631. def create_disk_from_file(self, full_name, current_time):
  632. if not full_name.endswith(CSV):
  633. self.debug('skipping {0}: not a csv file'.format(full_name))
  634. return None
  635. name = os.path.basename(full_name).split('.')[-3]
  636. path = os.path.join(self.log_path, full_name)
  637. if name in self.disks:
  638. self.debug('skipping {0}: already in disks'.format(full_name))
  639. return None
  640. if [p for p in self.exclude if p in name]:
  641. self.debug('skipping {0}: filtered by `exclude` option'.format(full_name))
  642. return None
  643. if not os.access(path, os.R_OK):
  644. self.debug('skipping {0}: not readable'.format(full_name))
  645. return None
  646. if os.path.getsize(path) == 0:
  647. self.debug('skipping {0}: zero size'.format(full_name))
  648. return None
  649. if (current_time - os.path.getmtime(path)) / 60 > self.age:
  650. self.debug('skipping {0}: haven\'t been updated for last {1} minutes'.format(full_name, self.age))
  651. return None
  652. if ATA in full_name:
  653. disk = ATADisk(name, DiskLogFile(path))
  654. elif SCSI in full_name:
  655. disk = SCSIDisk(name, DiskLogFile(path))
  656. else:
  657. self.debug('skipping {0}: unknown type'.format(full_name))
  658. return None
  659. disk.populate_attrs()
  660. if not disk.attrs:
  661. self.error('skipping {0}: parsing failed'.format(full_name))
  662. return None
  663. self.debug('added {0}'.format(full_name))
  664. return disk
  665. def add_disk_to_charts(self, disk):
  666. if len(self.charts) == 0 or disk.charted:
  667. return
  668. disk.charted = True
  669. for attr in disk.attrs:
  670. chart_id = CHARTED_ATTRS.get(attr.name)
  671. if not chart_id or chart_id not in self.charts:
  672. continue
  673. chart = self.charts[chart_id]
  674. dim = [
  675. '{0}_{1}'.format(disk.name, attr.name),
  676. disk.name,
  677. CHARTS[chart_id]['algo'],
  678. ]
  679. if dim[0] in self.charts[chart_id].dimensions:
  680. chart.hide_dimension(dim[0], reverse=True)
  681. else:
  682. chart.add_dimension(dim)
  683. def remove_disk_from_charts(self, disk):
  684. if len(self.charts) == 0 or not disk.charted:
  685. return
  686. for attr in disk.attrs:
  687. chart_id = CHARTED_ATTRS.get(attr.name)
  688. if not chart_id or chart_id not in self.charts:
  689. continue
  690. self.charts[chart_id].del_dimension('{0}_{1}'.format(disk.name, attr.name))