mongodb.chart.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728
  1. # -*- coding: utf-8 -*-
  2. # Description: mongodb netdata python.d module
  3. # Author: ilyam8
  4. # SPDX-License-Identifier: GPL-3.0-or-later
  5. from copy import deepcopy
  6. from datetime import datetime
  7. from sys import exc_info
  8. try:
  9. from pymongo import MongoClient, ASCENDING, DESCENDING
  10. from pymongo.errors import PyMongoError
  11. PYMONGO = True
  12. except ImportError:
  13. PYMONGO = False
  14. from bases.FrameworkServices.SimpleService import SimpleService
  15. REPL_SET_STATES = [
  16. ('1', 'primary'),
  17. ('8', 'down'),
  18. ('2', 'secondary'),
  19. ('3', 'recovering'),
  20. ('5', 'startup2'),
  21. ('4', 'fatal'),
  22. ('7', 'arbiter'),
  23. ('6', 'unknown'),
  24. ('9', 'rollback'),
  25. ('10', 'removed'),
  26. ('0', 'startup')
  27. ]
  28. def multiply_by_100(value):
  29. return value * 100
  30. DEFAULT_METRICS = [
  31. ('opcounters.delete', None, None),
  32. ('opcounters.update', None, None),
  33. ('opcounters.insert', None, None),
  34. ('opcounters.query', None, None),
  35. ('opcounters.getmore', None, None),
  36. ('globalLock.activeClients.readers', 'activeClients_readers', None),
  37. ('globalLock.activeClients.writers', 'activeClients_writers', None),
  38. ('connections.available', 'connections_available', None),
  39. ('connections.current', 'connections_current', None),
  40. ('mem.mapped', None, None),
  41. ('mem.resident', None, None),
  42. ('mem.virtual', None, None),
  43. ('globalLock.currentQueue.readers', 'currentQueue_readers', None),
  44. ('globalLock.currentQueue.writers', 'currentQueue_writers', None),
  45. ('asserts.msg', None, None),
  46. ('asserts.regular', None, None),
  47. ('asserts.user', None, None),
  48. ('asserts.warning', None, None),
  49. ('extra_info.page_faults', None, None),
  50. ('metrics.record.moves', None, None),
  51. ('backgroundFlushing.average_ms', None, multiply_by_100),
  52. ('backgroundFlushing.last_ms', None, multiply_by_100),
  53. ('backgroundFlushing.flushes', None, multiply_by_100),
  54. ('metrics.cursor.timedOut', None, None),
  55. ('metrics.cursor.open.total', 'cursor_total', None),
  56. ('metrics.cursor.open.noTimeout', None, None),
  57. ('cursors.timedOut', None, None),
  58. ('cursors.totalOpen', 'cursor_total', None)
  59. ]
  60. DUR = [
  61. ('dur.commits', None, None),
  62. ('dur.journaledMB', None, multiply_by_100)
  63. ]
  64. WIREDTIGER = [
  65. ('wiredTiger.concurrentTransactions.read.available', 'wiredTigerRead_available', None),
  66. ('wiredTiger.concurrentTransactions.read.out', 'wiredTigerRead_out', None),
  67. ('wiredTiger.concurrentTransactions.write.available', 'wiredTigerWrite_available', None),
  68. ('wiredTiger.concurrentTransactions.write.out', 'wiredTigerWrite_out', None),
  69. ('wiredTiger.cache.bytes currently in the cache', None, None),
  70. ('wiredTiger.cache.tracked dirty bytes in the cache', None, None),
  71. ('wiredTiger.cache.maximum bytes configured', None, None),
  72. ('wiredTiger.cache.unmodified pages evicted', 'unmodified', None),
  73. ('wiredTiger.cache.modified pages evicted', 'modified', None)
  74. ]
  75. TCMALLOC = [
  76. ('tcmalloc.generic.current_allocated_bytes', None, None),
  77. ('tcmalloc.generic.heap_size', None, None),
  78. ('tcmalloc.tcmalloc.central_cache_free_bytes', None, None),
  79. ('tcmalloc.tcmalloc.current_total_thread_cache_bytes', None, None),
  80. ('tcmalloc.tcmalloc.pageheap_free_bytes', None, None),
  81. ('tcmalloc.tcmalloc.pageheap_unmapped_bytes', None, None),
  82. ('tcmalloc.tcmalloc.thread_cache_free_bytes', None, None),
  83. ('tcmalloc.tcmalloc.transfer_cache_free_bytes', None, None)
  84. ]
  85. COMMANDS = [
  86. ('metrics.commands.count.total', 'count_total', None),
  87. ('metrics.commands.createIndexes.total', 'createIndexes_total', None),
  88. ('metrics.commands.delete.total', 'delete_total', None),
  89. ('metrics.commands.eval.total', 'eval_total', None),
  90. ('metrics.commands.findAndModify.total', 'findAndModify_total', None),
  91. ('metrics.commands.insert.total', 'insert_total', None),
  92. ('metrics.commands.delete.total', 'delete_total', None),
  93. ('metrics.commands.count.failed', 'count_failed', None),
  94. ('metrics.commands.createIndexes.failed', 'createIndexes_failed', None),
  95. ('metrics.commands.delete.failed', 'delete_failed', None),
  96. ('metrics.commands.eval.failed', 'eval_failed', None),
  97. ('metrics.commands.findAndModify.failed', 'findAndModify_failed', None),
  98. ('metrics.commands.insert.failed', 'insert_failed', None),
  99. ('metrics.commands.delete.failed', 'delete_failed', None)
  100. ]
  101. LOCKS = [
  102. ('locks.Collection.acquireCount.R', 'Collection_R', None),
  103. ('locks.Collection.acquireCount.r', 'Collection_r', None),
  104. ('locks.Collection.acquireCount.W', 'Collection_W', None),
  105. ('locks.Collection.acquireCount.w', 'Collection_w', None),
  106. ('locks.Database.acquireCount.R', 'Database_R', None),
  107. ('locks.Database.acquireCount.r', 'Database_r', None),
  108. ('locks.Database.acquireCount.W', 'Database_W', None),
  109. ('locks.Database.acquireCount.w', 'Database_w', None),
  110. ('locks.Global.acquireCount.R', 'Global_R', None),
  111. ('locks.Global.acquireCount.r', 'Global_r', None),
  112. ('locks.Global.acquireCount.W', 'Global_W', None),
  113. ('locks.Global.acquireCount.w', 'Global_w', None),
  114. ('locks.Metadata.acquireCount.R', 'Metadata_R', None),
  115. ('locks.Metadata.acquireCount.w', 'Metadata_w', None),
  116. ('locks.oplog.acquireCount.r', 'oplog_r', None),
  117. ('locks.oplog.acquireCount.w', 'oplog_w', None)
  118. ]
  119. DBSTATS = [
  120. 'dataSize',
  121. 'indexSize',
  122. 'storageSize',
  123. 'objects'
  124. ]
  125. # charts order (can be overridden if you want less charts, or different order)
  126. ORDER = [
  127. 'read_operations',
  128. 'write_operations',
  129. 'active_clients',
  130. 'journaling_transactions',
  131. 'journaling_volume',
  132. 'background_flush_average',
  133. 'background_flush_last',
  134. 'background_flush_rate',
  135. 'wiredtiger_read',
  136. 'wiredtiger_write',
  137. 'cursors',
  138. 'connections',
  139. 'memory',
  140. 'page_faults',
  141. 'queued_requests',
  142. 'record_moves',
  143. 'wiredtiger_cache',
  144. 'wiredtiger_pages_evicted',
  145. 'asserts',
  146. 'locks_collection',
  147. 'locks_database',
  148. 'locks_global',
  149. 'locks_metadata',
  150. 'locks_oplog',
  151. 'dbstats_objects',
  152. 'tcmalloc_generic',
  153. 'tcmalloc_metrics',
  154. 'command_total_rate',
  155. 'command_failed_rate'
  156. ]
  157. CHARTS = {
  158. 'read_operations': {
  159. 'options': [None, 'Received read requests', 'requests/s', 'throughput metrics',
  160. 'mongodb.read_operations', 'line'],
  161. 'lines': [
  162. ['query', None, 'incremental'],
  163. ['getmore', None, 'incremental']
  164. ]
  165. },
  166. 'write_operations': {
  167. 'options': [None, 'Received write requests', 'requests/s', 'throughput metrics',
  168. 'mongodb.write_operations', 'line'],
  169. 'lines': [
  170. ['insert', None, 'incremental'],
  171. ['update', None, 'incremental'],
  172. ['delete', None, 'incremental']
  173. ]
  174. },
  175. 'active_clients': {
  176. 'options': [None, 'Clients with read or write operations in progress or queued', 'clients',
  177. 'throughput metrics', 'mongodb.active_clients', 'line'],
  178. 'lines': [
  179. ['activeClients_readers', 'readers', 'absolute'],
  180. ['activeClients_writers', 'writers', 'absolute']
  181. ]
  182. },
  183. 'journaling_transactions': {
  184. 'options': [None, 'Transactions that have been written to the journal', 'commits',
  185. 'database performance', 'mongodb.journaling_transactions', 'line'],
  186. 'lines': [
  187. ['commits', None, 'absolute']
  188. ]
  189. },
  190. 'journaling_volume': {
  191. 'options': [None, 'Volume of data written to the journal', 'MiB', 'database performance',
  192. 'mongodb.journaling_volume', 'line'],
  193. 'lines': [
  194. ['journaledMB', 'volume', 'absolute', 1, 100]
  195. ]
  196. },
  197. 'background_flush_average': {
  198. 'options': [None, 'Average time taken by flushes to execute', 'milliseconds', 'database performance',
  199. 'mongodb.background_flush_average', 'line'],
  200. 'lines': [
  201. ['average_ms', 'time', 'absolute', 1, 100]
  202. ]
  203. },
  204. 'background_flush_last': {
  205. 'options': [None, 'Time taken by the last flush operation to execute', 'milliseconds', 'database performance',
  206. 'mongodb.background_flush_last', 'line'],
  207. 'lines': [
  208. ['last_ms', 'time', 'absolute', 1, 100]
  209. ]
  210. },
  211. 'background_flush_rate': {
  212. 'options': [None, 'Flushes rate', 'flushes', 'database performance', 'mongodb.background_flush_rate', 'line'],
  213. 'lines': [
  214. ['flushes', 'flushes', 'incremental', 1, 1]
  215. ]
  216. },
  217. 'wiredtiger_read': {
  218. 'options': [None, 'Read tickets in use and remaining', 'tickets', 'database performance',
  219. 'mongodb.wiredtiger_read', 'stacked'],
  220. 'lines': [
  221. ['wiredTigerRead_available', 'available', 'absolute', 1, 1],
  222. ['wiredTigerRead_out', 'inuse', 'absolute', 1, 1]
  223. ]
  224. },
  225. 'wiredtiger_write': {
  226. 'options': [None, 'Write tickets in use and remaining', 'tickets', 'database performance',
  227. 'mongodb.wiredtiger_write', 'stacked'],
  228. 'lines': [
  229. ['wiredTigerWrite_available', 'available', 'absolute', 1, 1],
  230. ['wiredTigerWrite_out', 'inuse', 'absolute', 1, 1]
  231. ]
  232. },
  233. 'cursors': {
  234. 'options': [None, 'Currently openned cursors, cursors with timeout disabled and timed out cursors',
  235. 'cursors', 'database performance', 'mongodb.cursors', 'stacked'],
  236. 'lines': [
  237. ['cursor_total', 'openned', 'absolute', 1, 1],
  238. ['noTimeout', None, 'absolute', 1, 1],
  239. ['timedOut', None, 'incremental', 1, 1]
  240. ]
  241. },
  242. 'connections': {
  243. 'options': [None, 'Currently connected clients and unused connections', 'connections',
  244. 'resource utilization', 'mongodb.connections', 'stacked'],
  245. 'lines': [
  246. ['connections_available', 'unused', 'absolute', 1, 1],
  247. ['connections_current', 'connected', 'absolute', 1, 1]
  248. ]
  249. },
  250. 'memory': {
  251. 'options': [None, 'Memory metrics', 'MiB', 'resource utilization', 'mongodb.memory', 'stacked'],
  252. 'lines': [
  253. ['virtual', None, 'absolute', 1, 1],
  254. ['resident', None, 'absolute', 1, 1],
  255. ['nonmapped', None, 'absolute', 1, 1],
  256. ['mapped', None, 'absolute', 1, 1]
  257. ]
  258. },
  259. 'page_faults': {
  260. 'options': [None, 'Number of times MongoDB had to fetch data from disk', 'request/s',
  261. 'resource utilization', 'mongodb.page_faults', 'line'],
  262. 'lines': [
  263. ['page_faults', None, 'incremental', 1, 1]
  264. ]
  265. },
  266. 'queued_requests': {
  267. 'options': [None, 'Currently queued read and write requests', 'requests', 'resource saturation',
  268. 'mongodb.queued_requests', 'line'],
  269. 'lines': [
  270. ['currentQueue_readers', 'readers', 'absolute', 1, 1],
  271. ['currentQueue_writers', 'writers', 'absolute', 1, 1]
  272. ]
  273. },
  274. 'record_moves': {
  275. 'options': [None, 'Number of times documents had to be moved on-disk', 'number',
  276. 'resource saturation', 'mongodb.record_moves', 'line'],
  277. 'lines': [
  278. ['moves', None, 'incremental', 1, 1]
  279. ]
  280. },
  281. 'asserts': {
  282. 'options': [
  283. None,
  284. 'Number of message, warning, regular, corresponding to errors generated by users assertions raised',
  285. 'number', 'errors (asserts)', 'mongodb.asserts', 'line'],
  286. 'lines': [
  287. ['msg', None, 'incremental', 1, 1],
  288. ['warning', None, 'incremental', 1, 1],
  289. ['regular', None, 'incremental', 1, 1],
  290. ['user', None, 'incremental', 1, 1]
  291. ]
  292. },
  293. 'wiredtiger_cache': {
  294. 'options': [None, 'The percentage of the wiredTiger cache that is in use and cache with dirty bytes',
  295. 'percentage', 'resource utilization', 'mongodb.wiredtiger_cache', 'stacked'],
  296. 'lines': [
  297. ['wiredTiger_percent_clean', 'inuse', 'absolute', 1, 1000],
  298. ['wiredTiger_percent_dirty', 'dirty', 'absolute', 1, 1000]
  299. ]
  300. },
  301. 'wiredtiger_pages_evicted': {
  302. 'options': [None, 'Pages evicted from the cache',
  303. 'pages', 'resource utilization', 'mongodb.wiredtiger_pages_evicted', 'stacked'],
  304. 'lines': [
  305. ['unmodified', None, 'absolute', 1, 1],
  306. ['modified', None, 'absolute', 1, 1]
  307. ]
  308. },
  309. 'dbstats_objects': {
  310. 'options': [None, 'Number of documents in the database among all the collections', 'documents',
  311. 'storage size metrics', 'mongodb.dbstats_objects', 'stacked'],
  312. 'lines': []
  313. },
  314. 'tcmalloc_generic': {
  315. 'options': [None, 'Tcmalloc generic metrics', 'MiB', 'tcmalloc', 'mongodb.tcmalloc_generic', 'stacked'],
  316. 'lines': [
  317. ['current_allocated_bytes', 'allocated', 'absolute', 1, 1 << 20],
  318. ['heap_size', 'heap_size', 'absolute', 1, 1 << 20]
  319. ]
  320. },
  321. 'tcmalloc_metrics': {
  322. 'options': [None, 'Tcmalloc metrics', 'KiB', 'tcmalloc', 'mongodb.tcmalloc_metrics', 'stacked'],
  323. 'lines': [
  324. ['central_cache_free_bytes', 'central_cache_free', 'absolute', 1, 1024],
  325. ['current_total_thread_cache_bytes', 'current_total_thread_cache', 'absolute', 1, 1024],
  326. ['pageheap_free_bytes', 'pageheap_free', 'absolute', 1, 1024],
  327. ['pageheap_unmapped_bytes', 'pageheap_unmapped', 'absolute', 1, 1024],
  328. ['thread_cache_free_bytes', 'thread_cache_free', 'absolute', 1, 1024],
  329. ['transfer_cache_free_bytes', 'transfer_cache_free', 'absolute', 1, 1024]
  330. ]
  331. },
  332. 'command_total_rate': {
  333. 'options': [None, 'Commands total rate', 'commands/s', 'commands', 'mongodb.command_total_rate', 'stacked'],
  334. 'lines': [
  335. ['count_total', 'count', 'incremental', 1, 1],
  336. ['createIndexes_total', 'createIndexes', 'incremental', 1, 1],
  337. ['delete_total', 'delete', 'incremental', 1, 1],
  338. ['eval_total', 'eval', 'incremental', 1, 1],
  339. ['findAndModify_total', 'findAndModify', 'incremental', 1, 1],
  340. ['insert_total', 'insert', 'incremental', 1, 1],
  341. ['update_total', 'update', 'incremental', 1, 1]
  342. ]
  343. },
  344. 'command_failed_rate': {
  345. 'options': [None, 'Commands failed rate', 'commands/s', 'commands', 'mongodb.command_failed_rate', 'stacked'],
  346. 'lines': [
  347. ['count_failed', 'count', 'incremental', 1, 1],
  348. ['createIndexes_failed', 'createIndexes', 'incremental', 1, 1],
  349. ['delete_failed', 'delete', 'incremental', 1, 1],
  350. ['eval_failed', 'eval', 'incremental', 1, 1],
  351. ['findAndModify_failed', 'findAndModify', 'incremental', 1, 1],
  352. ['insert_failed', 'insert', 'incremental', 1, 1],
  353. ['update_failed', 'update', 'incremental', 1, 1]
  354. ]
  355. },
  356. 'locks_collection': {
  357. 'options': [None, 'Collection lock. Number of times the lock was acquired in the specified mode',
  358. 'locks', 'locks metrics', 'mongodb.locks_collection', 'stacked'],
  359. 'lines': [
  360. ['Collection_R', 'shared', 'incremental'],
  361. ['Collection_W', 'exclusive', 'incremental'],
  362. ['Collection_r', 'intent_shared', 'incremental'],
  363. ['Collection_w', 'intent_exclusive', 'incremental']
  364. ]
  365. },
  366. 'locks_database': {
  367. 'options': [None, 'Database lock. Number of times the lock was acquired in the specified mode',
  368. 'locks', 'locks metrics', 'mongodb.locks_database', 'stacked'],
  369. 'lines': [
  370. ['Database_R', 'shared', 'incremental'],
  371. ['Database_W', 'exclusive', 'incremental'],
  372. ['Database_r', 'intent_shared', 'incremental'],
  373. ['Database_w', 'intent_exclusive', 'incremental']
  374. ]
  375. },
  376. 'locks_global': {
  377. 'options': [None, 'Global lock. Number of times the lock was acquired in the specified mode',
  378. 'locks', 'locks metrics', 'mongodb.locks_global', 'stacked'],
  379. 'lines': [
  380. ['Global_R', 'shared', 'incremental'],
  381. ['Global_W', 'exclusive', 'incremental'],
  382. ['Global_r', 'intent_shared', 'incremental'],
  383. ['Global_w', 'intent_exclusive', 'incremental']
  384. ]
  385. },
  386. 'locks_metadata': {
  387. 'options': [None, 'Metadata lock. Number of times the lock was acquired in the specified mode',
  388. 'locks', 'locks metrics', 'mongodb.locks_metadata', 'stacked'],
  389. 'lines': [
  390. ['Metadata_R', 'shared', 'incremental'],
  391. ['Metadata_w', 'intent_exclusive', 'incremental']
  392. ]
  393. },
  394. 'locks_oplog': {
  395. 'options': [None, 'Lock on the oplog. Number of times the lock was acquired in the specified mode',
  396. 'locks', 'locks metrics', 'mongodb.locks_oplog', 'stacked'],
  397. 'lines': [
  398. ['oplog_r', 'intent_shared', 'incremental'],
  399. ['oplog_w', 'intent_exclusive', 'incremental']
  400. ]
  401. }
  402. }
  403. class Service(SimpleService):
  404. def __init__(self, configuration=None, name=None):
  405. SimpleService.__init__(self, configuration=configuration, name=name)
  406. self.order = ORDER[:]
  407. self.definitions = deepcopy(CHARTS)
  408. self.authdb = self.configuration.get('authdb', 'admin')
  409. self.user = self.configuration.get('user')
  410. self.password = self.configuration.get('pass')
  411. self.host = self.configuration.get('host', '127.0.0.1')
  412. self.port = self.configuration.get('port', 27017)
  413. self.timeout = self.configuration.get('timeout', 100)
  414. self.metrics_to_collect = deepcopy(DEFAULT_METRICS)
  415. self.connection = None
  416. self.do_replica = None
  417. self.databases = list()
  418. def check(self):
  419. if not PYMONGO:
  420. self.error('Pymongo package v2.4+ is needed to use mongodb.chart.py')
  421. return False
  422. self.connection, server_status, error = self._create_connection()
  423. if error:
  424. self.error(error)
  425. return False
  426. self.build_metrics_to_collect_(server_status)
  427. try:
  428. data = self._get_data()
  429. except (LookupError, SyntaxError, AttributeError):
  430. self.error('Type: %s, error: %s' % (str(exc_info()[0]), str(exc_info()[1])))
  431. return False
  432. if isinstance(data, dict) and data:
  433. self._data_from_check = data
  434. self.create_charts_(server_status)
  435. return True
  436. self.error('_get_data() returned no data or type is not <dict>')
  437. return False
  438. def build_metrics_to_collect_(self, server_status):
  439. self.do_replica = 'repl' in server_status
  440. if 'dur' in server_status:
  441. self.metrics_to_collect.extend(DUR)
  442. if 'tcmalloc' in server_status:
  443. self.metrics_to_collect.extend(TCMALLOC)
  444. if 'commands' in server_status['metrics']:
  445. self.metrics_to_collect.extend(COMMANDS)
  446. if 'wiredTiger' in server_status:
  447. self.metrics_to_collect.extend(WIREDTIGER)
  448. if 'Collection' in server_status['locks']:
  449. self.metrics_to_collect.extend(LOCKS)
  450. def create_charts_(self, server_status):
  451. if 'dur' not in server_status:
  452. self.order.remove('journaling_transactions')
  453. self.order.remove('journaling_volume')
  454. if 'backgroundFlushing' not in server_status:
  455. self.order.remove('background_flush_average')
  456. self.order.remove('background_flush_last')
  457. self.order.remove('background_flush_rate')
  458. if 'wiredTiger' not in server_status:
  459. self.order.remove('wiredtiger_write')
  460. self.order.remove('wiredtiger_read')
  461. self.order.remove('wiredtiger_cache')
  462. if 'tcmalloc' not in server_status:
  463. self.order.remove('tcmalloc_generic')
  464. self.order.remove('tcmalloc_metrics')
  465. if 'commands' not in server_status['metrics']:
  466. self.order.remove('command_total_rate')
  467. self.order.remove('command_failed_rate')
  468. if 'Collection' not in server_status['locks']:
  469. self.order.remove('locks_collection')
  470. self.order.remove('locks_database')
  471. self.order.remove('locks_global')
  472. self.order.remove('locks_metadata')
  473. if 'oplog' not in server_status['locks']:
  474. self.order.remove('locks_oplog')
  475. for dbase in self.databases:
  476. self.order.append('_'.join([dbase, 'dbstats']))
  477. self.definitions['_'.join([dbase, 'dbstats'])] = {
  478. 'options': [None, '%s: size of all documents, indexes, extents' % dbase, 'KB',
  479. 'storage size metrics', 'mongodb.dbstats', 'line'],
  480. 'lines': [
  481. ['_'.join([dbase, 'dataSize']), 'documents', 'absolute', 1, 1024],
  482. ['_'.join([dbase, 'indexSize']), 'indexes', 'absolute', 1, 1024],
  483. ['_'.join([dbase, 'storageSize']), 'extents', 'absolute', 1, 1024]
  484. ]}
  485. self.definitions['dbstats_objects']['lines'].append(['_'.join([dbase, 'objects']), dbase, 'absolute'])
  486. if self.do_replica:
  487. def create_lines(hosts, string):
  488. lines = list()
  489. for host in hosts:
  490. dim_id = '_'.join([host, string])
  491. lines.append([dim_id, host, 'absolute', 1, 1000])
  492. return lines
  493. def create_state_lines(states):
  494. lines = list()
  495. for state, description in states:
  496. dim_id = '_'.join([host, 'state', state])
  497. lines.append([dim_id, description, 'absolute', 1, 1])
  498. return lines
  499. all_hosts = server_status['repl']['hosts'] + server_status['repl'].get('arbiters', list())
  500. this_host = server_status['repl']['me']
  501. other_hosts = [host for host in all_hosts if host != this_host]
  502. if 'local' in self.databases:
  503. self.order.append('oplog_window')
  504. self.definitions['oplog_window'] = {
  505. 'options': [None, 'Interval of time between the oldest and the latest entries in the oplog',
  506. 'seconds', 'replication and oplog', 'mongodb.oplog_window', 'line'],
  507. 'lines': [['timeDiff', 'window', 'absolute', 1, 1000]]}
  508. # Create "heartbeat delay" chart
  509. self.order.append('heartbeat_delay')
  510. self.definitions['heartbeat_delay'] = {
  511. 'options': [
  512. None,
  513. 'Time when last heartbeat was received from the replica set member (lastHeartbeatRecv)',
  514. 'seconds ago', 'replication and oplog', 'mongodb.replication_heartbeat_delay', 'stacked'],
  515. 'lines': create_lines(other_hosts, 'heartbeat_lag')}
  516. # Create "optimedate delay" chart
  517. self.order.append('optimedate_delay')
  518. self.definitions['optimedate_delay'] = {
  519. 'options': [None, 'Time when last entry from the oplog was applied (optimeDate)',
  520. 'seconds ago', 'replication and oplog', 'mongodb.replication_optimedate_delay', 'stacked'],
  521. 'lines': create_lines(all_hosts, 'optimedate')}
  522. # Create "replica set members state" chart
  523. for host in all_hosts:
  524. chart_name = '_'.join([host, 'state'])
  525. self.order.append(chart_name)
  526. self.definitions[chart_name] = {
  527. 'options': [None, 'Replica set member (%s) current state' % host, 'state',
  528. 'replication and oplog', 'mongodb.replication_state', 'line'],
  529. 'lines': create_state_lines(REPL_SET_STATES)}
  530. def _get_raw_data(self):
  531. raw_data = dict()
  532. raw_data.update(self.get_server_status() or dict())
  533. raw_data.update(self.get_db_stats() or dict())
  534. raw_data.update(self.get_repl_set_get_status() or dict())
  535. raw_data.update(self.get_get_replication_info() or dict())
  536. return raw_data or None
  537. def get_server_status(self):
  538. raw_data = dict()
  539. try:
  540. raw_data['serverStatus'] = self.connection.admin.command('serverStatus')
  541. except PyMongoError:
  542. return None
  543. else:
  544. return raw_data
  545. def get_db_stats(self):
  546. if not self.databases:
  547. return None
  548. raw_data = dict()
  549. raw_data['dbStats'] = dict()
  550. try:
  551. for dbase in self.databases:
  552. raw_data['dbStats'][dbase] = self.connection[dbase].command('dbStats')
  553. return raw_data
  554. except PyMongoError:
  555. return None
  556. def get_repl_set_get_status(self):
  557. if not self.do_replica:
  558. return None
  559. raw_data = dict()
  560. try:
  561. raw_data['replSetGetStatus'] = self.connection.admin.command('replSetGetStatus')
  562. return raw_data
  563. except PyMongoError:
  564. return None
  565. def get_get_replication_info(self):
  566. if not (self.do_replica and 'local' in self.databases):
  567. return None
  568. raw_data = dict()
  569. raw_data['getReplicationInfo'] = dict()
  570. try:
  571. raw_data['getReplicationInfo']['ASCENDING'] = self.connection.local.oplog.rs.find().sort(
  572. '$natural', ASCENDING).limit(1)[0]
  573. raw_data['getReplicationInfo']['DESCENDING'] = self.connection.local.oplog.rs.find().sort(
  574. '$natural', DESCENDING).limit(1)[0]
  575. return raw_data
  576. except PyMongoError:
  577. return None
  578. def _get_data(self):
  579. """
  580. :return: dict
  581. """
  582. raw_data = self._get_raw_data()
  583. if not raw_data:
  584. return None
  585. to_netdata = dict()
  586. serverStatus = raw_data['serverStatus']
  587. dbStats = raw_data.get('dbStats')
  588. replSetGetStatus = raw_data.get('replSetGetStatus')
  589. getReplicationInfo = raw_data.get('getReplicationInfo')
  590. utc_now = datetime.utcnow()
  591. # serverStatus
  592. for metric, new_name, func in self.metrics_to_collect:
  593. value = serverStatus
  594. for key in metric.split('.'):
  595. try:
  596. value = value[key]
  597. except KeyError:
  598. break
  599. if not isinstance(value, dict) and key:
  600. to_netdata[new_name or key] = value if not func else func(value)
  601. to_netdata['nonmapped'] = to_netdata['virtual'] - serverStatus['mem'].get('mappedWithJournal',
  602. to_netdata['mapped'])
  603. if to_netdata.get('maximum bytes configured'):
  604. maximum = to_netdata['maximum bytes configured']
  605. to_netdata['wiredTiger_percent_clean'] = int(to_netdata['bytes currently in the cache']
  606. * 100 / maximum * 1000)
  607. to_netdata['wiredTiger_percent_dirty'] = int(to_netdata['tracked dirty bytes in the cache']
  608. * 100 / maximum * 1000)
  609. # dbStats
  610. if dbStats:
  611. for dbase in dbStats:
  612. for metric in DBSTATS:
  613. key = '_'.join([dbase, metric])
  614. to_netdata[key] = dbStats[dbase][metric]
  615. # replSetGetStatus
  616. if replSetGetStatus:
  617. other_hosts = list()
  618. members = replSetGetStatus['members']
  619. unix_epoch = datetime(1970, 1, 1, 0, 0)
  620. for member in members:
  621. if not member.get('self'):
  622. other_hosts.append(member)
  623. # Replica set time diff between current time and time when last entry from the oplog was applied
  624. if member.get('optimeDate', unix_epoch) != unix_epoch:
  625. member_optimedate = member['name'] + '_optimedate'
  626. to_netdata.update({member_optimedate: int(delta_calculation(delta=utc_now - member['optimeDate'],
  627. multiplier=1000))})
  628. # Replica set members state
  629. member_state = member['name'] + '_state'
  630. for elem in REPL_SET_STATES:
  631. state = elem[0]
  632. to_netdata.update({'_'.join([member_state, state]): 0})
  633. to_netdata.update({'_'.join([member_state, str(member['state'])]): member['state']})
  634. # Heartbeat lag calculation
  635. for other in other_hosts:
  636. if other['lastHeartbeatRecv'] != unix_epoch:
  637. node = other['name'] + '_heartbeat_lag'
  638. to_netdata[node] = int(delta_calculation(delta=utc_now - other['lastHeartbeatRecv'],
  639. multiplier=1000))
  640. if getReplicationInfo:
  641. first_event = getReplicationInfo['ASCENDING']['ts'].as_datetime()
  642. last_event = getReplicationInfo['DESCENDING']['ts'].as_datetime()
  643. to_netdata['timeDiff'] = int(delta_calculation(delta=last_event - first_event, multiplier=1000))
  644. return to_netdata
  645. def _create_connection(self):
  646. conn_vars = {'host': self.host, 'port': self.port}
  647. if hasattr(MongoClient, 'server_selection_timeout'):
  648. conn_vars.update({'serverselectiontimeoutms': self.timeout})
  649. try:
  650. connection = MongoClient(**conn_vars)
  651. if self.user and self.password:
  652. getattr(connection, self.authdb).authenticate(name=self.user, password=self.password)
  653. # elif self.user:
  654. # connection.admin.authenticate(name=self.user, mechanism='MONGODB-X509')
  655. server_status = connection.admin.command('serverStatus')
  656. except PyMongoError as error:
  657. return None, None, str(error)
  658. else:
  659. try:
  660. self.databases = connection.database_names()
  661. except PyMongoError as error:
  662. self.info('Can\'t collect databases: %s' % str(error))
  663. return connection, server_status, None
  664. def delta_calculation(delta, multiplier=1):
  665. if hasattr(delta, 'total_seconds'):
  666. return delta.total_seconds() * multiplier
  667. return (delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10 ** 6) / 10.0 ** 6 * multiplier