smartd_log.chart.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772
  1. # -*- coding: utf-8 -*-
  2. # Description: smart netdata python.d module
  3. # Author: ilyam8, vorph1
  4. # SPDX-License-Identifier: GPL-3.0-or-later
  5. import os
  6. import re
  7. from copy import deepcopy
  8. from time import time
  9. from bases.FrameworkServices.SimpleService import SimpleService
  10. from bases.collection import read_last_line
  11. INCREMENTAL = 'incremental'
  12. ABSOLUTE = 'absolute'
  13. ATA = 'ata'
  14. SCSI = 'scsi'
  15. CSV = '.csv'
  16. DEF_RESCAN_INTERVAL = 60
  17. DEF_AGE = 30
  18. DEF_PATH = '/var/log/smartd'
  19. ATTR1 = '1'
  20. ATTR2 = '2'
  21. ATTR3 = '3'
  22. ATTR4 = '4'
  23. ATTR5 = '5'
  24. ATTR7 = '7'
  25. ATTR8 = '8'
  26. ATTR9 = '9'
  27. ATTR10 = '10'
  28. ATTR11 = '11'
  29. ATTR12 = '12'
  30. ATTR13 = '13'
  31. ATTR170 = '170'
  32. ATTR171 = '171'
  33. ATTR172 = '172'
  34. ATTR173 = '173'
  35. ATTR174 = '174'
  36. ATTR180 = '180'
  37. ATTR183 = '183'
  38. ATTR190 = '190'
  39. ATTR194 = '194'
  40. ATTR196 = '196'
  41. ATTR197 = '197'
  42. ATTR198 = '198'
  43. ATTR199 = '199'
  44. ATTR202 = '202'
  45. ATTR206 = '206'
  46. ATTR233 = '233'
  47. ATTR249 = '249'
  48. ATTR_READ_ERR_COR = 'read-total-err-corrected'
  49. ATTR_READ_ERR_UNC = 'read-total-unc-errors'
  50. ATTR_WRITE_ERR_COR = 'write-total-err-corrected'
  51. ATTR_WRITE_ERR_UNC = 'write-total-unc-errors'
  52. ATTR_VERIFY_ERR_COR = 'verify-total-err-corrected'
  53. ATTR_VERIFY_ERR_UNC = 'verify-total-unc-errors'
  54. ATTR_TEMPERATURE = 'temperature'
  55. RE_ATA = re.compile(
  56. '(\d+);' # attribute
  57. '(\d+);' # normalized value
  58. '(\d+)', # raw value
  59. re.X
  60. )
  61. RE_SCSI = re.compile(
  62. '([a-z-]+);' # attribute
  63. '([0-9.]+)', # raw value
  64. re.X
  65. )
  66. ORDER = [
  67. # errors
  68. 'read_error_rate',
  69. 'seek_error_rate',
  70. 'soft_read_error_rate',
  71. 'write_error_rate',
  72. 'read_total_err_corrected',
  73. 'read_total_unc_errors',
  74. 'write_total_err_corrected',
  75. 'write_total_unc_errors',
  76. 'verify_total_err_corrected',
  77. 'verify_total_unc_errors',
  78. # external failure
  79. 'sata_interface_downshift',
  80. 'udma_crc_error_count',
  81. # performance
  82. 'throughput_performance',
  83. 'seek_time_performance',
  84. # power
  85. 'start_stop_count',
  86. 'power_on_hours_count',
  87. 'power_cycle_count',
  88. 'unexpected_power_loss',
  89. # spin
  90. 'spin_up_time',
  91. 'spin_up_retries',
  92. 'calibration_retries',
  93. # temperature
  94. 'airflow_temperature_celsius',
  95. 'temperature_celsius',
  96. # wear
  97. 'reallocated_sectors_count',
  98. 'reserved_block_count',
  99. 'program_fail_count',
  100. 'erase_fail_count',
  101. 'wear_leveller_worst_case_erase_count',
  102. 'unused_reserved_nand_blocks',
  103. 'reallocation_event_count',
  104. 'current_pending_sector_count',
  105. 'offline_uncorrectable_sector_count',
  106. 'percent_lifetime_used',
  107. 'media_wearout_indicator',
  108. ]
  109. CHARTS = {
  110. 'read_error_rate': {
  111. 'options': [None, 'Read Error Rate', 'value', 'errors', 'smartd_log.read_error_rate', 'line'],
  112. 'lines': [],
  113. 'attrs': [ATTR1],
  114. 'algo': ABSOLUTE,
  115. },
  116. 'seek_error_rate': {
  117. 'options': [None, 'Seek Error Rate', 'value', 'errors', 'smartd_log.seek_error_rate', 'line'],
  118. 'lines': [],
  119. 'attrs': [ATTR7],
  120. 'algo': ABSOLUTE,
  121. },
  122. 'soft_read_error_rate': {
  123. 'options': [None, 'Soft Read Error Rate', 'errors', 'errors', 'smartd_log.soft_read_error_rate', 'line'],
  124. 'lines': [],
  125. 'attrs': [ATTR13],
  126. 'algo': INCREMENTAL,
  127. },
  128. 'write_error_rate': {
  129. 'options': [None, 'Write Error Rate', 'value', 'errors', 'smartd_log.write_error_rate', 'line'],
  130. 'lines': [],
  131. 'attrs': [ATTR206],
  132. 'algo': ABSOLUTE,
  133. },
  134. 'read_total_err_corrected': {
  135. 'options': [None, 'Read Error Corrected', 'errors', 'errors', 'smartd_log.read_total_err_corrected', 'line'],
  136. 'lines': [],
  137. 'attrs': [ATTR_READ_ERR_COR],
  138. 'algo': INCREMENTAL,
  139. },
  140. 'read_total_unc_errors': {
  141. 'options': [None, 'Read Error Uncorrected', 'errors', 'errors', 'smartd_log.read_total_unc_errors', 'line'],
  142. 'lines': [],
  143. 'attrs': [ATTR_READ_ERR_UNC],
  144. 'algo': INCREMENTAL,
  145. },
  146. 'write_total_err_corrected': {
  147. 'options': [None, 'Write Error Corrected', 'errors', 'errors', 'smartd_log.write_total_err_corrected', 'line'],
  148. 'lines': [],
  149. 'attrs': [ATTR_WRITE_ERR_COR],
  150. 'algo': INCREMENTAL,
  151. },
  152. 'write_total_unc_errors': {
  153. 'options': [None, 'Write Error Uncorrected', 'errors', 'errors', 'smartd_log.write_total_unc_errors', 'line'],
  154. 'lines': [],
  155. 'attrs': [ATTR_WRITE_ERR_UNC],
  156. 'algo': INCREMENTAL,
  157. },
  158. 'verify_total_err_corrected': {
  159. 'options': [None, 'Verify Error Corrected', 'errors', 'errors', 'smartd_log.verify_total_err_corrected',
  160. 'line'],
  161. 'lines': [],
  162. 'attrs': [ATTR_VERIFY_ERR_COR],
  163. 'algo': INCREMENTAL,
  164. },
  165. 'verify_total_unc_errors': {
  166. 'options': [None, 'Verify Error Uncorrected', 'errors', 'errors', 'smartd_log.verify_total_unc_errors', 'line'],
  167. 'lines': [],
  168. 'attrs': [ATTR_VERIFY_ERR_UNC],
  169. 'algo': INCREMENTAL,
  170. },
  171. 'sata_interface_downshift': {
  172. 'options': [None, 'SATA Interface Downshift', 'events', 'external failure',
  173. 'smartd_log.sata_interface_downshift', 'line'],
  174. 'lines': [],
  175. 'attrs': [ATTR183],
  176. 'algo': INCREMENTAL,
  177. },
  178. 'udma_crc_error_count': {
  179. 'options': [None, 'UDMA CRC Error Count', 'errors', 'external failure', 'smartd_log.udma_crc_error_count',
  180. 'line'],
  181. 'lines': [],
  182. 'attrs': [ATTR199],
  183. 'algo': INCREMENTAL,
  184. },
  185. 'throughput_performance': {
  186. 'options': [None, 'Throughput Performance', 'value', 'performance', 'smartd_log.throughput_performance',
  187. 'line'],
  188. 'lines': [],
  189. 'attrs': [ATTR2],
  190. 'algo': ABSOLUTE,
  191. },
  192. 'seek_time_performance': {
  193. 'options': [None, 'Seek Time Performance', 'value', 'performance', 'smartd_log.seek_time_performance', 'line'],
  194. 'lines': [],
  195. 'attrs': [ATTR8],
  196. 'algo': ABSOLUTE,
  197. },
  198. 'start_stop_count': {
  199. 'options': [None, 'Start/Stop Count', 'events', 'power', 'smartd_log.start_stop_count', 'line'],
  200. 'lines': [],
  201. 'attrs': [ATTR4],
  202. 'algo': ABSOLUTE,
  203. },
  204. 'power_on_hours_count': {
  205. 'options': [None, 'Power-On Hours Count', 'hours', 'power', 'smartd_log.power_on_hours_count', 'line'],
  206. 'lines': [],
  207. 'attrs': [ATTR9],
  208. 'algo': ABSOLUTE,
  209. },
  210. 'power_cycle_count': {
  211. 'options': [None, 'Power Cycle Count', 'events', 'power', 'smartd_log.power_cycle_count', 'line'],
  212. 'lines': [],
  213. 'attrs': [ATTR12],
  214. 'algo': ABSOLUTE,
  215. },
  216. 'unexpected_power_loss': {
  217. 'options': [None, 'Unexpected Power Loss', 'events', 'power', 'smartd_log.unexpected_power_loss', 'line'],
  218. 'lines': [],
  219. 'attrs': [ATTR174],
  220. 'algo': ABSOLUTE,
  221. },
  222. 'spin_up_time': {
  223. 'options': [None, 'Spin-Up Time', 'ms', 'spin', 'smartd_log.spin_up_time', 'line'],
  224. 'lines': [],
  225. 'attrs': [ATTR3],
  226. 'algo': ABSOLUTE,
  227. },
  228. 'spin_up_retries': {
  229. 'options': [None, 'Spin-up Retries', 'retries', 'spin', 'smartd_log.spin_up_retries', 'line'],
  230. 'lines': [],
  231. 'attrs': [ATTR10],
  232. 'algo': INCREMENTAL,
  233. },
  234. 'calibration_retries': {
  235. 'options': [None, 'Calibration Retries', 'retries', 'spin', 'smartd_log.calibration_retries', 'line'],
  236. 'lines': [],
  237. 'attrs': [ATTR11],
  238. 'algo': INCREMENTAL,
  239. },
  240. 'airflow_temperature_celsius': {
  241. 'options': [None, 'Airflow Temperature Celsius', 'celsius', 'temperature',
  242. 'smartd_log.airflow_temperature_celsius', 'line'],
  243. 'lines': [],
  244. 'attrs': [ATTR190],
  245. 'algo': ABSOLUTE,
  246. },
  247. 'temperature_celsius': {
  248. 'options': [None, 'Temperature', 'celsius', 'temperature', 'smartd_log.temperature_celsius', 'line'],
  249. 'lines': [],
  250. 'attrs': [ATTR194, ATTR_TEMPERATURE],
  251. 'algo': ABSOLUTE,
  252. },
  253. 'reallocated_sectors_count': {
  254. 'options': [None, 'Reallocated Sectors Count', 'sectors', 'wear', 'smartd_log.reallocated_sectors_count',
  255. 'line'],
  256. 'lines': [],
  257. 'attrs': [ATTR5],
  258. 'algo': ABSOLUTE,
  259. },
  260. 'reserved_block_count': {
  261. 'options': [None, 'Reserved Block Count', 'percentage', 'wear', 'smartd_log.reserved_block_count', 'line'],
  262. 'lines': [],
  263. 'attrs': [ATTR170],
  264. 'algo': ABSOLUTE,
  265. },
  266. 'program_fail_count': {
  267. 'options': [None, 'Program Fail Count', 'errors', 'wear', 'smartd_log.program_fail_count', 'line'],
  268. 'lines': [],
  269. 'attrs': [ATTR171],
  270. 'algo': INCREMENTAL,
  271. },
  272. 'erase_fail_count': {
  273. 'options': [None, 'Erase Fail Count', 'failures', 'wear', 'smartd_log.erase_fail_count', 'line'],
  274. 'lines': [],
  275. 'attrs': [ATTR172],
  276. 'algo': INCREMENTAL,
  277. },
  278. 'wear_leveller_worst_case_erase_count': {
  279. 'options': [None, 'Wear Leveller Worst Case Erase Count', 'erases', 'wear',
  280. 'smartd_log.wear_leveller_worst_case_erase_count', 'line'],
  281. 'lines': [],
  282. 'attrs': [ATTR173],
  283. 'algo': ABSOLUTE,
  284. },
  285. 'unused_reserved_nand_blocks': {
  286. 'options': [None, 'Unused Reserved NAND Blocks', 'blocks', 'wear', 'smartd_log.unused_reserved_nand_blocks',
  287. 'line'],
  288. 'lines': [],
  289. 'attrs': [ATTR180],
  290. 'algo': ABSOLUTE,
  291. },
  292. 'reallocation_event_count': {
  293. 'options': [None, 'Reallocation Event Count', 'events', 'wear', 'smartd_log.reallocation_event_count', 'line'],
  294. 'lines': [],
  295. 'attrs': [ATTR196],
  296. 'algo': INCREMENTAL,
  297. },
  298. 'current_pending_sector_count': {
  299. 'options': [None, 'Current Pending Sector Count', 'sectors', 'wear', 'smartd_log.current_pending_sector_count',
  300. 'line'],
  301. 'lines': [],
  302. 'attrs': [ATTR197],
  303. 'algo': ABSOLUTE,
  304. },
  305. 'offline_uncorrectable_sector_count': {
  306. 'options': [None, 'Offline Uncorrectable Sector Count', 'sectors', 'wear',
  307. 'smartd_log.offline_uncorrectable_sector_count', 'line'],
  308. 'lines': [],
  309. 'attrs': [ATTR198],
  310. 'algo': ABSOLUTE,
  311. },
  312. 'percent_lifetime_used': {
  313. 'options': [None, 'Percent Lifetime Used', 'percentage', 'wear', 'smartd_log.percent_lifetime_used', 'line'],
  314. 'lines': [],
  315. 'attrs': [ATTR202],
  316. 'algo': ABSOLUTE,
  317. },
  318. 'media_wearout_indicator': {
  319. 'options': [None, 'Media Wearout Indicator', 'percentage', 'wear', 'smartd_log.media_wearout_indicator', 'line'],
  320. 'lines': [],
  321. 'attrs': [ATTR233],
  322. 'algo': ABSOLUTE,
  323. },
  324. 'nand_writes_1gib': {
  325. 'options': [None, 'NAND Writes', 'GiB', 'wear', 'smartd_log.nand_writes_1gib', 'line'],
  326. 'lines': [],
  327. 'attrs': [ATTR249],
  328. 'algo': ABSOLUTE,
  329. },
  330. }
  331. # NOTE: 'parse_temp' decodes ATA 194 raw value. Not heavily tested. Written by @Ferroin
  332. # C code:
  333. # https://github.com/smartmontools/smartmontools/blob/master/smartmontools/atacmds.cpp#L2051
  334. #
  335. # Calling 'parse_temp' on the raw value will return a 4-tuple, containing
  336. # * temperature
  337. # * minimum
  338. # * maximum
  339. # * over-temperature count
  340. # substituting None for values it can't decode.
  341. #
  342. # Example:
  343. # >>> parse_temp(42952491042)
  344. # >>> (34, 10, 43, None)
  345. #
  346. #
  347. # def check_temp_word(i):
  348. # if i <= 0x7F:
  349. # return 0x11
  350. # elif i <= 0xFF:
  351. # return 0x01
  352. # elif 0xFF80 <= i:
  353. # return 0x10
  354. # return 0x00
  355. #
  356. #
  357. # def check_temp_range(t, b0, b1):
  358. # if b0 > b1:
  359. # t0, t1 = b1, b0
  360. # else:
  361. # t0, t1 = b0, b1
  362. #
  363. # if all([
  364. # -60 <= t0,
  365. # t0 <= t,
  366. # t <= t1,
  367. # t1 <= 120,
  368. # not (t0 == -1 and t1 <= 0)
  369. # ]):
  370. # return t0, t1
  371. # return None, None
  372. #
  373. #
  374. # def parse_temp(raw):
  375. # byte = list()
  376. # word = list()
  377. # for i in range(0, 6):
  378. # byte.append(0xFF & (raw >> (i * 8)))
  379. # for i in range(0, 3):
  380. # word.append(0xFFFF & (raw >> (i * 16)))
  381. #
  382. # ctwd = check_temp_word(word[0])
  383. #
  384. # if not word[2]:
  385. # if ctwd and not word[1]:
  386. # # byte[0] is temp, no other data
  387. # return byte[0], None, None, None
  388. #
  389. # if ctwd and all(check_temp_range(byte[0], byte[2], byte[3])):
  390. # # byte[0] is temp, byte[2] is max or min, byte[3] is min or max
  391. # trange = check_temp_range(byte[0], byte[2], byte[3])
  392. # return byte[0], trange[0], trange[1], None
  393. #
  394. # if ctwd and all(check_temp_range(byte[0], byte[1], byte[2])):
  395. # # byte[0] is temp, byte[1] is max or min, byte[2] is min or max
  396. # trange = check_temp_range(byte[0], byte[1], byte[2])
  397. # return byte[0], trange[0], trange[1], None
  398. #
  399. # return None, None, None, None
  400. #
  401. # if ctwd:
  402. # if all(
  403. # [
  404. # ctwd & check_temp_word(word[1]) & check_temp_word(word[2]) != 0x00,
  405. # all(check_temp_range(byte[0], byte[2], byte[4])),
  406. # ]
  407. # ):
  408. # # byte[0] is temp, byte[2] is max or min, byte[4] is min or max
  409. # trange = check_temp_range(byte[0], byte[2], byte[4])
  410. # return byte[0], trange[0], trange[1], None
  411. # else:
  412. # trange = check_temp_range(byte[0], byte[2], byte[3])
  413. # if word[2] < 0x7FFF and all(trange) and trange[1] >= 40:
  414. # # byte[0] is temp, byte[2] is max or min, byte[3] is min or max, word[2] is overtemp count
  415. # return byte[0], trange[0], trange[1], word[2]
  416. # # no data
  417. # return None, None, None, None
  418. CHARTED_ATTRS = dict((attr, k) for k, v in CHARTS.items() for attr in v['attrs'])
  419. class BaseAtaSmartAttribute:
  420. def __init__(self, name, normalized_value, raw_value):
  421. self.name = name
  422. self.normalized_value = normalized_value
  423. self.raw_value = raw_value
  424. def value(self):
  425. raise NotImplementedError
  426. class AtaRaw(BaseAtaSmartAttribute):
  427. def value(self):
  428. return self.raw_value
  429. class AtaNormalized(BaseAtaSmartAttribute):
  430. def value(self):
  431. return self.normalized_value
  432. class Ata3(BaseAtaSmartAttribute):
  433. def value(self):
  434. value = int(self.raw_value)
  435. # https://github.com/netdata/netdata/issues/5919
  436. #
  437. # 3;151;38684000679;
  438. # 423 (Average 447)
  439. # 38684000679 & 0xFFF -> 423
  440. # (38684000679 & 0xFFF0000) >> 16 -> 447
  441. if value > 1e6:
  442. return value & 0xFFF
  443. return value
  444. class Ata9(BaseAtaSmartAttribute):
  445. def value(self):
  446. value = int(self.raw_value)
  447. if value > 1e6:
  448. return value & 0xFFFF
  449. return value
  450. class Ata190(BaseAtaSmartAttribute):
  451. def value(self):
  452. return 100 - int(self.normalized_value)
  453. class Ata194(BaseAtaSmartAttribute):
  454. # https://github.com/netdata/netdata/issues/3041
  455. # https://github.com/netdata/netdata/issues/5919
  456. #
  457. # The low byte is the current temperature, the third lowest is the maximum, and the fifth lowest is the minimum
  458. def value(self):
  459. value = int(self.raw_value)
  460. if value > 1e6:
  461. return value & 0xFF
  462. return min(int(self.normalized_value), int(self.raw_value))
  463. class BaseSCSISmartAttribute:
  464. def __init__(self, name, raw_value):
  465. self.name = name
  466. self.raw_value = raw_value
  467. def value(self):
  468. raise NotImplementedError
  469. class SCSIRaw(BaseSCSISmartAttribute):
  470. def value(self):
  471. return self.raw_value
  472. def ata_attribute_factory(value):
  473. name = value[0]
  474. if name == ATTR3:
  475. return Ata3(*value)
  476. elif name == ATTR9:
  477. return Ata9(*value)
  478. elif name == ATTR190:
  479. return Ata190(*value)
  480. elif name == ATTR194:
  481. return Ata194(*value)
  482. elif name in [
  483. ATTR1,
  484. ATTR7,
  485. ATTR202,
  486. ATTR206,
  487. ATTR233,
  488. ]:
  489. return AtaNormalized(*value)
  490. return AtaRaw(*value)
  491. def scsi_attribute_factory(value):
  492. return SCSIRaw(*value)
  493. def attribute_factory(value):
  494. name = value[0]
  495. if name.isdigit():
  496. return ata_attribute_factory(value)
  497. return scsi_attribute_factory(value)
  498. def handle_error(*errors):
  499. def on_method(method):
  500. def on_call(*args):
  501. try:
  502. return method(*args)
  503. except errors:
  504. return None
  505. return on_call
  506. return on_method
  507. class DiskLogFile:
  508. def __init__(self, full_path):
  509. self.path = full_path
  510. self.size = os.path.getsize(full_path)
  511. @handle_error(OSError)
  512. def is_changed(self):
  513. return self.size != os.path.getsize(self.path)
  514. @handle_error(OSError)
  515. def is_active(self, current_time, limit):
  516. return (current_time - os.path.getmtime(self.path)) / 60 < limit
  517. @handle_error(OSError)
  518. def read(self):
  519. self.size = os.path.getsize(self.path)
  520. return read_last_line(self.path)
  521. class BaseDisk:
  522. def __init__(self, name, log_file):
  523. self.raw_name = name
  524. self.name = re.sub(r'_+', '_', name)
  525. self.log_file = log_file
  526. self.attrs = list()
  527. self.alive = True
  528. self.charted = False
  529. def __eq__(self, other):
  530. if isinstance(other, BaseDisk):
  531. return self.raw_name == other.raw_name
  532. return self.raw_name == other
  533. def __ne__(self, other):
  534. return not self == other
  535. def __hash__(self):
  536. return hash(repr(self))
  537. def parser(self, data):
  538. raise NotImplementedError
  539. @handle_error(TypeError)
  540. def populate_attrs(self):
  541. self.attrs = list()
  542. line = self.log_file.read()
  543. for value in self.parser(line):
  544. self.attrs.append(attribute_factory(value))
  545. return len(self.attrs)
  546. def data(self):
  547. data = dict()
  548. for attr in self.attrs:
  549. data['{0}_{1}'.format(self.name, attr.name)] = attr.value()
  550. return data
  551. class ATADisk(BaseDisk):
  552. def parser(self, data):
  553. return RE_ATA.findall(data)
  554. class SCSIDisk(BaseDisk):
  555. def parser(self, data):
  556. return RE_SCSI.findall(data)
  557. class Service(SimpleService):
  558. def __init__(self, configuration=None, name=None):
  559. SimpleService.__init__(self, configuration=configuration, name=name)
  560. self.order = ORDER
  561. self.definitions = deepcopy(CHARTS)
  562. self.log_path = configuration.get('log_path', DEF_PATH)
  563. self.age = configuration.get('age', DEF_AGE)
  564. self.exclude = configuration.get('exclude_disks', str()).split()
  565. self.disks = list()
  566. self.runs = 0
  567. self.do_force_rescan = False
  568. def check(self):
  569. return self.scan() > 0
  570. def get_data(self):
  571. self.runs += 1
  572. if self.do_force_rescan or self.runs % DEF_RESCAN_INTERVAL == 0:
  573. self.cleanup()
  574. self.scan()
  575. self.do_force_rescan = False
  576. data = dict()
  577. for disk in self.disks:
  578. if not disk.alive:
  579. continue
  580. if not disk.charted:
  581. self.add_disk_to_charts(disk)
  582. changed = disk.log_file.is_changed()
  583. if changed is None:
  584. disk.alive = False
  585. self.do_force_rescan = True
  586. continue
  587. if changed and disk.populate_attrs() is None:
  588. disk.alive = False
  589. self.do_force_rescan = True
  590. continue
  591. data.update(disk.data())
  592. return data
  593. def cleanup(self):
  594. current_time = time()
  595. for disk in self.disks[:]:
  596. if any(
  597. [
  598. not disk.alive,
  599. not disk.log_file.is_active(current_time, self.age),
  600. ]
  601. ):
  602. self.disks.remove(disk.raw_name)
  603. self.remove_disk_from_charts(disk)
  604. def scan(self):
  605. self.debug('scanning {0}'.format(self.log_path))
  606. current_time = time()
  607. for full_name in os.listdir(self.log_path):
  608. disk = self.create_disk_from_file(full_name, current_time)
  609. if not disk:
  610. continue
  611. self.disks.append(disk)
  612. return len(self.disks)
  613. def create_disk_from_file(self, full_name, current_time):
  614. if not full_name.endswith(CSV):
  615. self.debug('skipping {0}: not a csv file'.format(full_name))
  616. return None
  617. name = os.path.basename(full_name).split('.')[-3]
  618. path = os.path.join(self.log_path, full_name)
  619. if name in self.disks:
  620. self.debug('skipping {0}: already in disks'.format(full_name))
  621. return None
  622. if [p for p in self.exclude if p in name]:
  623. self.debug('skipping {0}: filtered by `exclude` option'.format(full_name))
  624. return None
  625. if not os.access(path, os.R_OK):
  626. self.debug('skipping {0}: not readable'.format(full_name))
  627. return None
  628. if os.path.getsize(path) == 0:
  629. self.debug('skipping {0}: zero size'.format(full_name))
  630. return None
  631. if (current_time - os.path.getmtime(path)) / 60 > self.age:
  632. self.debug('skipping {0}: haven\'t been updated for last {1} minutes'.format(full_name, self.age))
  633. return None
  634. if ATA in full_name:
  635. disk = ATADisk(name, DiskLogFile(path))
  636. elif SCSI in full_name:
  637. disk = SCSIDisk(name, DiskLogFile(path))
  638. else:
  639. self.debug('skipping {0}: unknown type'.format(full_name))
  640. return None
  641. disk.populate_attrs()
  642. if not disk.attrs:
  643. self.error('skipping {0}: parsing failed'.format(full_name))
  644. return None
  645. self.debug('added {0}'.format(full_name))
  646. return disk
  647. def add_disk_to_charts(self, disk):
  648. if len(self.charts) == 0 or disk.charted:
  649. return
  650. disk.charted = True
  651. for attr in disk.attrs:
  652. chart_id = CHARTED_ATTRS.get(attr.name)
  653. if not chart_id or chart_id not in self.charts:
  654. continue
  655. chart = self.charts[chart_id]
  656. dim = [
  657. '{0}_{1}'.format(disk.name, attr.name),
  658. disk.name,
  659. CHARTS[chart_id]['algo'],
  660. ]
  661. if dim[0] in self.charts[chart_id].dimensions:
  662. chart.hide_dimension(dim[0], reverse=True)
  663. else:
  664. chart.add_dimension(dim)
  665. def remove_disk_from_charts(self, disk):
  666. if len(self.charts) == 0 or not disk.charted:
  667. return
  668. for attr in disk.attrs:
  669. chart_id = CHARTED_ATTRS.get(attr.name)
  670. if not chart_id or chart_id not in self.charts:
  671. continue
  672. self.charts[chart_id].del_dimension('{0}_{1}'.format(disk.name, attr.name))