mongodb.chart.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. # -*- coding: utf-8 -*-
  2. # Description: mongodb netdata python.d module
  3. # Author: ilyam8
  4. # SPDX-License-Identifier: GPL-3.0-or-later
  5. import ssl
  6. from copy import deepcopy
  7. from datetime import datetime
  8. from sys import exc_info
  9. try:
  10. from pymongo import MongoClient, ASCENDING, DESCENDING, version_tuple
  11. from pymongo.errors import PyMongoError
  12. PYMONGO = True
  13. except ImportError:
  14. PYMONGO = False
  15. from bases.FrameworkServices.SimpleService import SimpleService
  16. REPL_SET_STATES = [
  17. ('1', 'primary'),
  18. ('8', 'down'),
  19. ('2', 'secondary'),
  20. ('3', 'recovering'),
  21. ('5', 'startup2'),
  22. ('4', 'fatal'),
  23. ('7', 'arbiter'),
  24. ('6', 'unknown'),
  25. ('9', 'rollback'),
  26. ('10', 'removed'),
  27. ('0', 'startup')
  28. ]
  29. def multiply_by_100(value):
  30. return value * 100
  31. DEFAULT_METRICS = [
  32. ('opcounters.delete', None, None),
  33. ('opcounters.update', None, None),
  34. ('opcounters.insert', None, None),
  35. ('opcounters.query', None, None),
  36. ('opcounters.getmore', None, None),
  37. ('globalLock.activeClients.readers', 'activeClients_readers', None),
  38. ('globalLock.activeClients.writers', 'activeClients_writers', None),
  39. ('connections.available', 'connections_available', None),
  40. ('connections.current', 'connections_current', None),
  41. ('mem.mapped', None, None),
  42. ('mem.resident', None, None),
  43. ('mem.virtual', None, None),
  44. ('globalLock.currentQueue.readers', 'currentQueue_readers', None),
  45. ('globalLock.currentQueue.writers', 'currentQueue_writers', None),
  46. ('asserts.msg', None, None),
  47. ('asserts.regular', None, None),
  48. ('asserts.user', None, None),
  49. ('asserts.warning', None, None),
  50. ('extra_info.page_faults', None, None),
  51. ('metrics.record.moves', None, None),
  52. ('backgroundFlushing.average_ms', None, multiply_by_100),
  53. ('backgroundFlushing.last_ms', None, multiply_by_100),
  54. ('backgroundFlushing.flushes', None, multiply_by_100),
  55. ('metrics.cursor.timedOut', None, None),
  56. ('metrics.cursor.open.total', 'cursor_total', None),
  57. ('metrics.cursor.open.noTimeout', None, None),
  58. ('cursors.timedOut', None, None),
  59. ('cursors.totalOpen', 'cursor_total', None)
  60. ]
  61. DUR = [
  62. ('dur.commits', None, None),
  63. ('dur.journaledMB', None, multiply_by_100)
  64. ]
  65. WIREDTIGER = [
  66. ('wiredTiger.concurrentTransactions.read.available', 'wiredTigerRead_available', None),
  67. ('wiredTiger.concurrentTransactions.read.out', 'wiredTigerRead_out', None),
  68. ('wiredTiger.concurrentTransactions.write.available', 'wiredTigerWrite_available', None),
  69. ('wiredTiger.concurrentTransactions.write.out', 'wiredTigerWrite_out', None),
  70. ('wiredTiger.cache.bytes currently in the cache', None, None),
  71. ('wiredTiger.cache.tracked dirty bytes in the cache', None, None),
  72. ('wiredTiger.cache.maximum bytes configured', None, None),
  73. ('wiredTiger.cache.unmodified pages evicted', 'unmodified', None),
  74. ('wiredTiger.cache.modified pages evicted', 'modified', None)
  75. ]
  76. TCMALLOC = [
  77. ('tcmalloc.generic.current_allocated_bytes', None, None),
  78. ('tcmalloc.generic.heap_size', None, None),
  79. ('tcmalloc.tcmalloc.central_cache_free_bytes', None, None),
  80. ('tcmalloc.tcmalloc.current_total_thread_cache_bytes', None, None),
  81. ('tcmalloc.tcmalloc.pageheap_free_bytes', None, None),
  82. ('tcmalloc.tcmalloc.pageheap_unmapped_bytes', None, None),
  83. ('tcmalloc.tcmalloc.thread_cache_free_bytes', None, None),
  84. ('tcmalloc.tcmalloc.transfer_cache_free_bytes', None, None)
  85. ]
  86. COMMANDS = [
  87. ('metrics.commands.count.total', 'count_total', None),
  88. ('metrics.commands.createIndexes.total', 'createIndexes_total', None),
  89. ('metrics.commands.delete.total', 'delete_total', None),
  90. ('metrics.commands.eval.total', 'eval_total', None),
  91. ('metrics.commands.findAndModify.total', 'findAndModify_total', None),
  92. ('metrics.commands.insert.total', 'insert_total', None),
  93. ('metrics.commands.delete.total', 'delete_total', None),
  94. ('metrics.commands.count.failed', 'count_failed', None),
  95. ('metrics.commands.createIndexes.failed', 'createIndexes_failed', None),
  96. ('metrics.commands.delete.failed', 'delete_failed', None),
  97. ('metrics.commands.eval.failed', 'eval_failed', None),
  98. ('metrics.commands.findAndModify.failed', 'findAndModify_failed', None),
  99. ('metrics.commands.insert.failed', 'insert_failed', None),
  100. ('metrics.commands.delete.failed', 'delete_failed', None)
  101. ]
  102. LOCKS = [
  103. ('locks.Collection.acquireCount.R', 'Collection_R', None),
  104. ('locks.Collection.acquireCount.r', 'Collection_r', None),
  105. ('locks.Collection.acquireCount.W', 'Collection_W', None),
  106. ('locks.Collection.acquireCount.w', 'Collection_w', None),
  107. ('locks.Database.acquireCount.R', 'Database_R', None),
  108. ('locks.Database.acquireCount.r', 'Database_r', None),
  109. ('locks.Database.acquireCount.W', 'Database_W', None),
  110. ('locks.Database.acquireCount.w', 'Database_w', None),
  111. ('locks.Global.acquireCount.R', 'Global_R', None),
  112. ('locks.Global.acquireCount.r', 'Global_r', None),
  113. ('locks.Global.acquireCount.W', 'Global_W', None),
  114. ('locks.Global.acquireCount.w', 'Global_w', None),
  115. ('locks.Metadata.acquireCount.R', 'Metadata_R', None),
  116. ('locks.Metadata.acquireCount.w', 'Metadata_w', None),
  117. ('locks.oplog.acquireCount.r', 'oplog_r', None),
  118. ('locks.oplog.acquireCount.w', 'oplog_w', None)
  119. ]
  120. DBSTATS = [
  121. 'dataSize',
  122. 'indexSize',
  123. 'storageSize',
  124. 'objects'
  125. ]
  126. # charts order (can be overridden if you want less charts, or different order)
  127. ORDER = [
  128. 'read_operations',
  129. 'write_operations',
  130. 'active_clients',
  131. 'journaling_transactions',
  132. 'journaling_volume',
  133. 'background_flush_average',
  134. 'background_flush_last',
  135. 'background_flush_rate',
  136. 'wiredtiger_read',
  137. 'wiredtiger_write',
  138. 'cursors',
  139. 'connections',
  140. 'memory',
  141. 'page_faults',
  142. 'queued_requests',
  143. 'record_moves',
  144. 'wiredtiger_cache',
  145. 'wiredtiger_pages_evicted',
  146. 'asserts',
  147. 'locks_collection',
  148. 'locks_database',
  149. 'locks_global',
  150. 'locks_metadata',
  151. 'locks_oplog',
  152. 'dbstats_objects',
  153. 'tcmalloc_generic',
  154. 'tcmalloc_metrics',
  155. 'command_total_rate',
  156. 'command_failed_rate'
  157. ]
  158. CHARTS = {
  159. 'read_operations': {
  160. 'options': [None, 'Received read requests', 'requests/s', 'throughput metrics',
  161. 'mongodb.read_operations', 'line'],
  162. 'lines': [
  163. ['query', None, 'incremental'],
  164. ['getmore', None, 'incremental']
  165. ]
  166. },
  167. 'write_operations': {
  168. 'options': [None, 'Received write requests', 'requests/s', 'throughput metrics',
  169. 'mongodb.write_operations', 'line'],
  170. 'lines': [
  171. ['insert', None, 'incremental'],
  172. ['update', None, 'incremental'],
  173. ['delete', None, 'incremental']
  174. ]
  175. },
  176. 'active_clients': {
  177. 'options': [None, 'Clients with read or write operations in progress or queued', 'clients',
  178. 'throughput metrics', 'mongodb.active_clients', 'line'],
  179. 'lines': [
  180. ['activeClients_readers', 'readers', 'absolute'],
  181. ['activeClients_writers', 'writers', 'absolute']
  182. ]
  183. },
  184. 'journaling_transactions': {
  185. 'options': [None, 'Transactions that have been written to the journal', 'commits',
  186. 'database performance', 'mongodb.journaling_transactions', 'line'],
  187. 'lines': [
  188. ['commits', None, 'absolute']
  189. ]
  190. },
  191. 'journaling_volume': {
  192. 'options': [None, 'Volume of data written to the journal', 'MiB', 'database performance',
  193. 'mongodb.journaling_volume', 'line'],
  194. 'lines': [
  195. ['journaledMB', 'volume', 'absolute', 1, 100]
  196. ]
  197. },
  198. 'background_flush_average': {
  199. 'options': [None, 'Average time taken by flushes to execute', 'milliseconds', 'database performance',
  200. 'mongodb.background_flush_average', 'line'],
  201. 'lines': [
  202. ['average_ms', 'time', 'absolute', 1, 100]
  203. ]
  204. },
  205. 'background_flush_last': {
  206. 'options': [None, 'Time taken by the last flush operation to execute', 'milliseconds', 'database performance',
  207. 'mongodb.background_flush_last', 'line'],
  208. 'lines': [
  209. ['last_ms', 'time', 'absolute', 1, 100]
  210. ]
  211. },
  212. 'background_flush_rate': {
  213. 'options': [None, 'Flushes rate', 'flushes', 'database performance', 'mongodb.background_flush_rate', 'line'],
  214. 'lines': [
  215. ['flushes', 'flushes', 'incremental', 1, 1]
  216. ]
  217. },
  218. 'wiredtiger_read': {
  219. 'options': [None, 'Read tickets in use and remaining', 'tickets', 'database performance',
  220. 'mongodb.wiredtiger_read', 'stacked'],
  221. 'lines': [
  222. ['wiredTigerRead_available', 'available', 'absolute', 1, 1],
  223. ['wiredTigerRead_out', 'inuse', 'absolute', 1, 1]
  224. ]
  225. },
  226. 'wiredtiger_write': {
  227. 'options': [None, 'Write tickets in use and remaining', 'tickets', 'database performance',
  228. 'mongodb.wiredtiger_write', 'stacked'],
  229. 'lines': [
  230. ['wiredTigerWrite_available', 'available', 'absolute', 1, 1],
  231. ['wiredTigerWrite_out', 'inuse', 'absolute', 1, 1]
  232. ]
  233. },
  234. 'cursors': {
  235. 'options': [None, 'Currently opened cursors, cursors with timeout disabled and timed out cursors',
  236. 'cursors', 'database performance', 'mongodb.cursors', 'stacked'],
  237. 'lines': [
  238. ['cursor_total', 'opened', 'absolute', 1, 1],
  239. ['noTimeout', None, 'absolute', 1, 1],
  240. ['timedOut', None, 'incremental', 1, 1]
  241. ]
  242. },
  243. 'connections': {
  244. 'options': [None, 'Currently connected clients and unused connections', 'connections',
  245. 'resource utilization', 'mongodb.connections', 'stacked'],
  246. 'lines': [
  247. ['connections_available', 'unused', 'absolute', 1, 1],
  248. ['connections_current', 'connected', 'absolute', 1, 1]
  249. ]
  250. },
  251. 'memory': {
  252. 'options': [None, 'Memory metrics', 'MiB', 'resource utilization', 'mongodb.memory', 'stacked'],
  253. 'lines': [
  254. ['virtual', None, 'absolute', 1, 1],
  255. ['resident', None, 'absolute', 1, 1],
  256. ['nonmapped', None, 'absolute', 1, 1],
  257. ['mapped', None, 'absolute', 1, 1]
  258. ]
  259. },
  260. 'page_faults': {
  261. 'options': [None, 'Number of times MongoDB had to fetch data from disk', 'request/s',
  262. 'resource utilization', 'mongodb.page_faults', 'line'],
  263. 'lines': [
  264. ['page_faults', None, 'incremental', 1, 1]
  265. ]
  266. },
  267. 'queued_requests': {
  268. 'options': [None, 'Currently queued read and write requests', 'requests', 'resource saturation',
  269. 'mongodb.queued_requests', 'line'],
  270. 'lines': [
  271. ['currentQueue_readers', 'readers', 'absolute', 1, 1],
  272. ['currentQueue_writers', 'writers', 'absolute', 1, 1]
  273. ]
  274. },
  275. 'record_moves': {
  276. 'options': [None, 'Number of times documents had to be moved on-disk', 'number',
  277. 'resource saturation', 'mongodb.record_moves', 'line'],
  278. 'lines': [
  279. ['moves', None, 'incremental', 1, 1]
  280. ]
  281. },
  282. 'asserts': {
  283. 'options': [
  284. None,
  285. 'Number of message, warning, regular, corresponding to errors generated by users assertions raised',
  286. 'number', 'errors (asserts)', 'mongodb.asserts', 'line'],
  287. 'lines': [
  288. ['msg', None, 'incremental', 1, 1],
  289. ['warning', None, 'incremental', 1, 1],
  290. ['regular', None, 'incremental', 1, 1],
  291. ['user', None, 'incremental', 1, 1]
  292. ]
  293. },
  294. 'wiredtiger_cache': {
  295. 'options': [None, 'The percentage of the wiredTiger cache that is in use and cache with dirty bytes',
  296. 'percentage', 'resource utilization', 'mongodb.wiredtiger_cache', 'stacked'],
  297. 'lines': [
  298. ['wiredTiger_percent_clean', 'inuse', 'absolute', 1, 1000],
  299. ['wiredTiger_percent_dirty', 'dirty', 'absolute', 1, 1000]
  300. ]
  301. },
  302. 'wiredtiger_pages_evicted': {
  303. 'options': [None, 'Pages evicted from the cache',
  304. 'pages', 'resource utilization', 'mongodb.wiredtiger_pages_evicted', 'stacked'],
  305. 'lines': [
  306. ['unmodified', None, 'absolute', 1, 1],
  307. ['modified', None, 'absolute', 1, 1]
  308. ]
  309. },
  310. 'dbstats_objects': {
  311. 'options': [None, 'Number of documents in the database among all the collections', 'documents',
  312. 'storage size metrics', 'mongodb.dbstats_objects', 'stacked'],
  313. 'lines': []
  314. },
  315. 'tcmalloc_generic': {
  316. 'options': [None, 'Tcmalloc generic metrics', 'MiB', 'tcmalloc', 'mongodb.tcmalloc_generic', 'stacked'],
  317. 'lines': [
  318. ['current_allocated_bytes', 'allocated', 'absolute', 1, 1 << 20],
  319. ['heap_size', 'heap_size', 'absolute', 1, 1 << 20]
  320. ]
  321. },
  322. 'tcmalloc_metrics': {
  323. 'options': [None, 'Tcmalloc metrics', 'KiB', 'tcmalloc', 'mongodb.tcmalloc_metrics', 'stacked'],
  324. 'lines': [
  325. ['central_cache_free_bytes', 'central_cache_free', 'absolute', 1, 1024],
  326. ['current_total_thread_cache_bytes', 'current_total_thread_cache', 'absolute', 1, 1024],
  327. ['pageheap_free_bytes', 'pageheap_free', 'absolute', 1, 1024],
  328. ['pageheap_unmapped_bytes', 'pageheap_unmapped', 'absolute', 1, 1024],
  329. ['thread_cache_free_bytes', 'thread_cache_free', 'absolute', 1, 1024],
  330. ['transfer_cache_free_bytes', 'transfer_cache_free', 'absolute', 1, 1024]
  331. ]
  332. },
  333. 'command_total_rate': {
  334. 'options': [None, 'Commands total rate', 'commands/s', 'commands', 'mongodb.command_total_rate', 'stacked'],
  335. 'lines': [
  336. ['count_total', 'count', 'incremental', 1, 1],
  337. ['createIndexes_total', 'createIndexes', 'incremental', 1, 1],
  338. ['delete_total', 'delete', 'incremental', 1, 1],
  339. ['eval_total', 'eval', 'incremental', 1, 1],
  340. ['findAndModify_total', 'findAndModify', 'incremental', 1, 1],
  341. ['insert_total', 'insert', 'incremental', 1, 1],
  342. ['update_total', 'update', 'incremental', 1, 1]
  343. ]
  344. },
  345. 'command_failed_rate': {
  346. 'options': [None, 'Commands failed rate', 'commands/s', 'commands', 'mongodb.command_failed_rate', 'stacked'],
  347. 'lines': [
  348. ['count_failed', 'count', 'incremental', 1, 1],
  349. ['createIndexes_failed', 'createIndexes', 'incremental', 1, 1],
  350. ['delete_failed', 'delete', 'incremental', 1, 1],
  351. ['eval_failed', 'eval', 'incremental', 1, 1],
  352. ['findAndModify_failed', 'findAndModify', 'incremental', 1, 1],
  353. ['insert_failed', 'insert', 'incremental', 1, 1],
  354. ['update_failed', 'update', 'incremental', 1, 1]
  355. ]
  356. },
  357. 'locks_collection': {
  358. 'options': [None, 'Collection lock. Number of times the lock was acquired in the specified mode',
  359. 'locks', 'locks metrics', 'mongodb.locks_collection', 'stacked'],
  360. 'lines': [
  361. ['Collection_R', 'shared', 'incremental'],
  362. ['Collection_W', 'exclusive', 'incremental'],
  363. ['Collection_r', 'intent_shared', 'incremental'],
  364. ['Collection_w', 'intent_exclusive', 'incremental']
  365. ]
  366. },
  367. 'locks_database': {
  368. 'options': [None, 'Database lock. Number of times the lock was acquired in the specified mode',
  369. 'locks', 'locks metrics', 'mongodb.locks_database', 'stacked'],
  370. 'lines': [
  371. ['Database_R', 'shared', 'incremental'],
  372. ['Database_W', 'exclusive', 'incremental'],
  373. ['Database_r', 'intent_shared', 'incremental'],
  374. ['Database_w', 'intent_exclusive', 'incremental']
  375. ]
  376. },
  377. 'locks_global': {
  378. 'options': [None, 'Global lock. Number of times the lock was acquired in the specified mode',
  379. 'locks', 'locks metrics', 'mongodb.locks_global', 'stacked'],
  380. 'lines': [
  381. ['Global_R', 'shared', 'incremental'],
  382. ['Global_W', 'exclusive', 'incremental'],
  383. ['Global_r', 'intent_shared', 'incremental'],
  384. ['Global_w', 'intent_exclusive', 'incremental']
  385. ]
  386. },
  387. 'locks_metadata': {
  388. 'options': [None, 'Metadata lock. Number of times the lock was acquired in the specified mode',
  389. 'locks', 'locks metrics', 'mongodb.locks_metadata', 'stacked'],
  390. 'lines': [
  391. ['Metadata_R', 'shared', 'incremental'],
  392. ['Metadata_w', 'intent_exclusive', 'incremental']
  393. ]
  394. },
  395. 'locks_oplog': {
  396. 'options': [None, 'Lock on the oplog. Number of times the lock was acquired in the specified mode',
  397. 'locks', 'locks metrics', 'mongodb.locks_oplog', 'stacked'],
  398. 'lines': [
  399. ['oplog_r', 'intent_shared', 'incremental'],
  400. ['oplog_w', 'intent_exclusive', 'incremental']
  401. ]
  402. }
  403. }
  404. DEFAULT_HOST = '127.0.0.1'
  405. DEFAULT_PORT = 27017
  406. DEFAULT_TIMEOUT = 100
  407. DEFAULT_AUTHDB = 'admin'
  408. CONN_PARAM_HOST = 'host'
  409. CONN_PARAM_PORT = 'port'
  410. CONN_PARAM_SERVER_SELECTION_TIMEOUT_MS = 'serverselectiontimeoutms'
  411. CONN_PARAM_SSL_SSL = 'ssl'
  412. CONN_PARAM_SSL_CERT_REQS = 'ssl_cert_reqs'
  413. CONN_PARAM_SSL_CA_CERTS = 'ssl_ca_certs'
  414. CONN_PARAM_SSL_CRL_FILE = 'ssl_crlfile'
  415. CONN_PARAM_SSL_CERT_FILE = 'ssl_certfile'
  416. CONN_PARAM_SSL_KEY_FILE = 'ssl_keyfile'
  417. CONN_PARAM_SSL_PEM_PASSPHRASE = 'ssl_pem_passphrase'
  418. class Service(SimpleService):
  419. def __init__(self, configuration=None, name=None):
  420. SimpleService.__init__(self, configuration=configuration, name=name)
  421. self.order = ORDER[:]
  422. self.definitions = deepcopy(CHARTS)
  423. self.authdb = self.configuration.get('authdb', DEFAULT_AUTHDB)
  424. self.user = self.configuration.get('user')
  425. self.password = self.configuration.get('pass')
  426. self.metrics_to_collect = deepcopy(DEFAULT_METRICS)
  427. self.connection = None
  428. self.do_replica = None
  429. self.databases = list()
  430. def check(self):
  431. if not PYMONGO:
  432. self.error('Pymongo package v2.4+ is needed to use mongodb.chart.py')
  433. return False
  434. self.connection, server_status, error = self._create_connection()
  435. if error:
  436. self.error(error)
  437. return False
  438. self.build_metrics_to_collect_(server_status)
  439. try:
  440. data = self._get_data()
  441. except (LookupError, SyntaxError, AttributeError):
  442. self.error('Type: %s, error: %s' % (str(exc_info()[0]), str(exc_info()[1])))
  443. return False
  444. if isinstance(data, dict) and data:
  445. self._data_from_check = data
  446. self.create_charts_(server_status)
  447. return True
  448. self.error('_get_data() returned no data or type is not <dict>')
  449. return False
  450. def build_metrics_to_collect_(self, server_status):
  451. self.do_replica = 'repl' in server_status
  452. if 'dur' in server_status:
  453. self.metrics_to_collect.extend(DUR)
  454. if 'tcmalloc' in server_status:
  455. self.metrics_to_collect.extend(TCMALLOC)
  456. if 'commands' in server_status['metrics']:
  457. self.metrics_to_collect.extend(COMMANDS)
  458. if 'wiredTiger' in server_status:
  459. self.metrics_to_collect.extend(WIREDTIGER)
  460. has_locks = 'locks' in server_status
  461. if has_locks and 'Collection' in server_status['locks']:
  462. self.metrics_to_collect.extend(LOCKS)
  463. def create_charts_(self, server_status):
  464. if 'dur' not in server_status:
  465. self.order.remove('journaling_transactions')
  466. self.order.remove('journaling_volume')
  467. if 'backgroundFlushing' not in server_status:
  468. self.order.remove('background_flush_average')
  469. self.order.remove('background_flush_last')
  470. self.order.remove('background_flush_rate')
  471. if 'wiredTiger' not in server_status:
  472. self.order.remove('wiredtiger_write')
  473. self.order.remove('wiredtiger_read')
  474. self.order.remove('wiredtiger_cache')
  475. if 'tcmalloc' not in server_status:
  476. self.order.remove('tcmalloc_generic')
  477. self.order.remove('tcmalloc_metrics')
  478. if 'commands' not in server_status['metrics']:
  479. self.order.remove('command_total_rate')
  480. self.order.remove('command_failed_rate')
  481. has_no_locks = 'locks' not in server_status
  482. if has_no_locks or 'Collection' not in server_status['locks']:
  483. self.order.remove('locks_collection')
  484. self.order.remove('locks_database')
  485. self.order.remove('locks_global')
  486. self.order.remove('locks_metadata')
  487. if has_no_locks or 'oplog' not in server_status['locks']:
  488. self.order.remove('locks_oplog')
  489. for dbase in self.databases:
  490. self.order.append('_'.join([dbase, 'dbstats']))
  491. self.definitions['_'.join([dbase, 'dbstats'])] = {
  492. 'options': [None, '%s: size of all documents, indexes, extents' % dbase, 'KB',
  493. 'storage size metrics', 'mongodb.dbstats', 'line'],
  494. 'lines': [
  495. ['_'.join([dbase, 'dataSize']), 'documents', 'absolute', 1, 1024],
  496. ['_'.join([dbase, 'indexSize']), 'indexes', 'absolute', 1, 1024],
  497. ['_'.join([dbase, 'storageSize']), 'extents', 'absolute', 1, 1024]
  498. ]}
  499. self.definitions['dbstats_objects']['lines'].append(['_'.join([dbase, 'objects']), dbase, 'absolute'])
  500. if self.do_replica:
  501. def create_lines(hosts, string):
  502. lines = list()
  503. for host in hosts:
  504. dim_id = '_'.join([host, string])
  505. lines.append([dim_id, host, 'absolute', 1, 1000])
  506. return lines
  507. def create_state_lines(states):
  508. lines = list()
  509. for state, description in states:
  510. dim_id = '_'.join([host, 'state', state])
  511. lines.append([dim_id, description, 'absolute', 1, 1])
  512. return lines
  513. all_hosts = server_status['repl']['hosts'] + server_status['repl'].get('arbiters', list())
  514. this_host = server_status['repl']['me']
  515. other_hosts = [host for host in all_hosts if host != this_host]
  516. if 'local' in self.databases:
  517. self.order.append('oplog_window')
  518. self.definitions['oplog_window'] = {
  519. 'options': [None, 'Interval of time between the oldest and the latest entries in the oplog',
  520. 'seconds', 'replication and oplog', 'mongodb.oplog_window', 'line'],
  521. 'lines': [['timeDiff', 'window', 'absolute', 1, 1000]]}
  522. # Create "heartbeat delay" chart
  523. self.order.append('heartbeat_delay')
  524. self.definitions['heartbeat_delay'] = {
  525. 'options': [
  526. None,
  527. 'Time when last heartbeat was received from the replica set member (lastHeartbeatRecv)',
  528. 'seconds ago', 'replication and oplog', 'mongodb.replication_heartbeat_delay', 'stacked'],
  529. 'lines': create_lines(other_hosts, 'heartbeat_lag')}
  530. # Create "optimedate delay" chart
  531. self.order.append('optimedate_delay')
  532. self.definitions['optimedate_delay'] = {
  533. 'options': [None, 'Time when last entry from the oplog was applied (optimeDate)',
  534. 'seconds ago', 'replication and oplog', 'mongodb.replication_optimedate_delay', 'stacked'],
  535. 'lines': create_lines(all_hosts, 'optimedate')}
  536. # Create "replica set members state" chart
  537. for host in all_hosts:
  538. chart_name = '_'.join([host, 'state'])
  539. self.order.append(chart_name)
  540. self.definitions[chart_name] = {
  541. 'options': [None, 'Replica set member (%s) current state' % host, 'state',
  542. 'replication and oplog', 'mongodb.replication_state', 'line'],
  543. 'lines': create_state_lines(REPL_SET_STATES)}
  544. def _get_raw_data(self):
  545. raw_data = dict()
  546. raw_data.update(self.get_server_status() or dict())
  547. raw_data.update(self.get_db_stats() or dict())
  548. raw_data.update(self.get_repl_set_get_status() or dict())
  549. raw_data.update(self.get_get_replication_info() or dict())
  550. return raw_data or None
  551. def get_server_status(self):
  552. raw_data = dict()
  553. try:
  554. raw_data['serverStatus'] = self.connection.admin.command('serverStatus')
  555. except PyMongoError:
  556. return None
  557. else:
  558. return raw_data
  559. def get_db_stats(self):
  560. if not self.databases:
  561. return None
  562. raw_data = dict()
  563. raw_data['dbStats'] = dict()
  564. try:
  565. for dbase in self.databases:
  566. raw_data['dbStats'][dbase] = self.connection[dbase].command('dbStats')
  567. return raw_data
  568. except PyMongoError:
  569. return None
  570. def get_repl_set_get_status(self):
  571. if not self.do_replica:
  572. return None
  573. raw_data = dict()
  574. try:
  575. raw_data['replSetGetStatus'] = self.connection.admin.command('replSetGetStatus')
  576. return raw_data
  577. except PyMongoError:
  578. return None
  579. def get_get_replication_info(self):
  580. if not (self.do_replica and 'local' in self.databases):
  581. return None
  582. raw_data = dict()
  583. raw_data['getReplicationInfo'] = dict()
  584. try:
  585. raw_data['getReplicationInfo']['ASCENDING'] = self.connection.local.oplog.rs.find().sort(
  586. '$natural', ASCENDING).limit(1)[0]
  587. raw_data['getReplicationInfo']['DESCENDING'] = self.connection.local.oplog.rs.find().sort(
  588. '$natural', DESCENDING).limit(1)[0]
  589. return raw_data
  590. except PyMongoError:
  591. return None
  592. def _get_data(self):
  593. """
  594. :return: dict
  595. """
  596. raw_data = self._get_raw_data()
  597. if not raw_data:
  598. return None
  599. data = dict()
  600. serverStatus = raw_data['serverStatus']
  601. dbStats = raw_data.get('dbStats')
  602. replSetGetStatus = raw_data.get('replSetGetStatus')
  603. getReplicationInfo = raw_data.get('getReplicationInfo')
  604. utc_now = datetime.utcnow()
  605. # serverStatus
  606. for metric, new_name, func in self.metrics_to_collect:
  607. value = serverStatus
  608. for key in metric.split('.'):
  609. try:
  610. value = value[key]
  611. except KeyError:
  612. break
  613. if not isinstance(value, dict) and key:
  614. data[new_name or key] = value if not func else func(value)
  615. if 'mapped' in serverStatus['mem']:
  616. data['nonmapped'] = data['virtual'] - serverStatus['mem'].get('mappedWithJournal', data['mapped'])
  617. if data.get('maximum bytes configured'):
  618. maximum = data['maximum bytes configured']
  619. data['wiredTiger_percent_clean'] = int(data['bytes currently in the cache'] * 100 / maximum * 1000)
  620. data['wiredTiger_percent_dirty'] = int(data['tracked dirty bytes in the cache'] * 100 / maximum * 1000)
  621. # dbStats
  622. if dbStats:
  623. for dbase in dbStats:
  624. for metric in DBSTATS:
  625. key = '_'.join([dbase, metric])
  626. data[key] = dbStats[dbase][metric]
  627. # replSetGetStatus
  628. if replSetGetStatus:
  629. other_hosts = list()
  630. members = replSetGetStatus['members']
  631. unix_epoch = datetime(1970, 1, 1, 0, 0)
  632. for member in members:
  633. if not member.get('self'):
  634. other_hosts.append(member)
  635. # Replica set time diff between current time and time when last entry from the oplog was applied
  636. if member.get('optimeDate', unix_epoch) != unix_epoch:
  637. member_optimedate = member['name'] + '_optimedate'
  638. delta = utc_now - member['optimeDate']
  639. data[member_optimedate] = int(delta_calculation(delta=delta, multiplier=1000))
  640. # Replica set members state
  641. member_state = member['name'] + '_state'
  642. for elem in REPL_SET_STATES:
  643. state = elem[0]
  644. data.update({'_'.join([member_state, state]): 0})
  645. data.update({'_'.join([member_state, str(member['state'])]): member['state']})
  646. # Heartbeat lag calculation
  647. for other in other_hosts:
  648. if other['lastHeartbeatRecv'] != unix_epoch:
  649. node = other['name'] + '_heartbeat_lag'
  650. delta = utc_now - other['lastHeartbeatRecv']
  651. data[node] = int(delta_calculation(delta=delta, multiplier=1000))
  652. if getReplicationInfo:
  653. first_event = getReplicationInfo['ASCENDING']['ts'].as_datetime()
  654. last_event = getReplicationInfo['DESCENDING']['ts'].as_datetime()
  655. data['timeDiff'] = int(delta_calculation(delta=last_event - first_event, multiplier=1000))
  656. return data
  657. def build_ssl_connection_params(self):
  658. conf = self.configuration
  659. def cert_req(v):
  660. if v is None:
  661. return None
  662. if not v:
  663. return ssl.CERT_NONE
  664. return ssl.CERT_REQUIRED
  665. ssl_params = {
  666. CONN_PARAM_SSL_SSL: conf.get(CONN_PARAM_SSL_SSL),
  667. CONN_PARAM_SSL_CERT_REQS: cert_req(conf.get(CONN_PARAM_SSL_CERT_REQS)),
  668. CONN_PARAM_SSL_CA_CERTS: conf.get(CONN_PARAM_SSL_CA_CERTS),
  669. CONN_PARAM_SSL_CRL_FILE: conf.get(CONN_PARAM_SSL_CRL_FILE),
  670. CONN_PARAM_SSL_CERT_FILE: conf.get(CONN_PARAM_SSL_CERT_FILE),
  671. CONN_PARAM_SSL_KEY_FILE: conf.get(CONN_PARAM_SSL_KEY_FILE),
  672. CONN_PARAM_SSL_PEM_PASSPHRASE: conf.get(CONN_PARAM_SSL_PEM_PASSPHRASE),
  673. }
  674. ssl_params = dict((k, v) for k, v in ssl_params.items() if v is not None)
  675. return ssl_params
  676. def build_connection_params(self):
  677. conf = self.configuration
  678. params = {
  679. CONN_PARAM_HOST: conf.get(CONN_PARAM_HOST, DEFAULT_HOST),
  680. CONN_PARAM_PORT: conf.get(CONN_PARAM_PORT, DEFAULT_PORT),
  681. }
  682. if hasattr(MongoClient, 'server_selection_timeout') or version_tuple[0] >= 4:
  683. params[CONN_PARAM_SERVER_SELECTION_TIMEOUT_MS] = conf.get('timeout', DEFAULT_TIMEOUT)
  684. params.update(self.build_ssl_connection_params())
  685. return params
  686. def _create_connection(self):
  687. params = self.build_connection_params()
  688. self.debug('creating connection, connection params: {0}'.format(sorted(params)))
  689. try:
  690. connection = MongoClient(**params)
  691. if self.user and self.password:
  692. self.debug('authenticating, user: {0}, password: {1}'.format(self.user, self.password))
  693. getattr(connection, self.authdb).authenticate(name=self.user, password=self.password)
  694. else:
  695. self.debug('skip authenticating, user and password are not set')
  696. # elif self.user:
  697. # connection.admin.authenticate(name=self.user, mechanism='MONGODB-X509')
  698. server_status = connection.admin.command('serverStatus')
  699. except PyMongoError as error:
  700. return None, None, str(error)
  701. else:
  702. try:
  703. self.databases = connection.database_names()
  704. except PyMongoError as error:
  705. self.info('Can\'t collect databases: %s' % str(error))
  706. return connection, server_status, None
  707. def delta_calculation(delta, multiplier=1):
  708. if hasattr(delta, 'total_seconds'):
  709. return delta.total_seconds() * multiplier
  710. return (delta.microseconds + (delta.seconds + delta.days * 24 * 3600) * 10 ** 6) / 10.0 ** 6 * multiplier