procmon.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. # -*- test-case-name: twisted.runner.test.test_procmon -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. Support for starting, monitoring, and restarting child process.
  6. """
  7. import attr
  8. import incremental
  9. from twisted.python import deprecate
  10. from twisted.internet import error, protocol, reactor as _reactor
  11. from twisted.application import service
  12. from twisted.protocols import basic
  13. from twisted.logger import Logger
  14. @attr.s(frozen=True)
  15. class _Process(object):
  16. """
  17. The parameters of a process to be restarted.
  18. @ivar args: command-line arguments (including name of command as first one)
  19. @type args: C{list}
  20. @ivar uid: user-id to run process as, or None (which means inherit uid)
  21. @type uid: C{int}
  22. @ivar gid: group-id to run process as, or None (which means inherit gid)
  23. @type gid: C{int}
  24. @ivar env: environment for process
  25. @type env: C{dict}
  26. @ivar cwd: initial working directory for process or None
  27. (which means inherit cwd)
  28. @type cwd: C{str}
  29. """
  30. args = attr.ib()
  31. uid = attr.ib(default=None)
  32. gid = attr.ib(default=None)
  33. env = attr.ib(default=attr.Factory(dict))
  34. cwd = attr.ib(default=None)
  35. @deprecate.deprecated(incremental.Version('Twisted', 18, 7, 0))
  36. def toTuple(self):
  37. """
  38. Convert process to tuple.
  39. Convert process to tuple that looks like the legacy structure
  40. of processes, for potential users who inspected processes
  41. directly.
  42. This was only an accidental feature, and will be removed. If
  43. you need to remember what processes were added to a process monitor,
  44. keep track of that when they are added. The process list
  45. inside the process monitor is no longer a public API.
  46. This allows changing the internal structure of the process list,
  47. when warranted by bug fixes or additional features.
  48. @return: tuple representation of process
  49. """
  50. return (self.args, self.uid, self.gid, self.env)
  51. class DummyTransport:
  52. disconnecting = 0
  53. transport = DummyTransport()
  54. class LineLogger(basic.LineReceiver):
  55. tag = None
  56. stream = None
  57. delimiter = b'\n'
  58. service = None
  59. def lineReceived(self, line):
  60. try:
  61. line = line.decode('utf-8')
  62. except UnicodeDecodeError:
  63. line = repr(line)
  64. self.service.log.info(u'[{tag}] {line}',
  65. tag=self.tag,
  66. line=line,
  67. stream=self.stream)
  68. class LoggingProtocol(protocol.ProcessProtocol):
  69. service = None
  70. name = None
  71. def connectionMade(self):
  72. self._output = LineLogger()
  73. self._output.tag = self.name
  74. self._output.stream = 'stdout'
  75. self._output.service = self.service
  76. self._outputEmpty = True
  77. self._error = LineLogger()
  78. self._error.tag = self.name
  79. self._error.stream = 'stderr'
  80. self._error.service = self.service
  81. self._errorEmpty = True
  82. self._output.makeConnection(transport)
  83. self._error.makeConnection(transport)
  84. def outReceived(self, data):
  85. self._output.dataReceived(data)
  86. self._outputEmpty = data[-1] == b'\n'
  87. def errReceived(self, data):
  88. self._error.dataReceived(data)
  89. self._errorEmpty = data[-1] == b'\n'
  90. def processEnded(self, reason):
  91. if not self._outputEmpty:
  92. self._output.dataReceived(b'\n')
  93. if not self._errorEmpty:
  94. self._error.dataReceived(b'\n')
  95. self.service.connectionLost(self.name)
  96. @property
  97. def output(self):
  98. return self._output
  99. @property
  100. def empty(self):
  101. return self._outputEmpty
  102. class ProcessMonitor(service.Service):
  103. """
  104. ProcessMonitor runs processes, monitors their progress, and restarts
  105. them when they die.
  106. The ProcessMonitor will not attempt to restart a process that appears to
  107. die instantly -- with each "instant" death (less than 1 second, by
  108. default), it will delay approximately twice as long before restarting
  109. it. A successful run will reset the counter.
  110. The primary interface is L{addProcess} and L{removeProcess}. When the
  111. service is running (that is, when the application it is attached to is
  112. running), adding a process automatically starts it.
  113. Each process has a name. This name string must uniquely identify the
  114. process. In particular, attempting to add two processes with the same
  115. name will result in a C{KeyError}.
  116. @type threshold: C{float}
  117. @ivar threshold: How long a process has to live before the death is
  118. considered instant, in seconds. The default value is 1 second.
  119. @type killTime: C{float}
  120. @ivar killTime: How long a process being killed has to get its affairs
  121. in order before it gets killed with an unmaskable signal. The
  122. default value is 5 seconds.
  123. @type minRestartDelay: C{float}
  124. @ivar minRestartDelay: The minimum time (in seconds) to wait before
  125. attempting to restart a process. Default 1s.
  126. @type maxRestartDelay: C{float}
  127. @ivar maxRestartDelay: The maximum time (in seconds) to wait before
  128. attempting to restart a process. Default 3600s (1h).
  129. @type _reactor: L{IReactorProcess} provider
  130. @ivar _reactor: A provider of L{IReactorProcess} and L{IReactorTime}
  131. which will be used to spawn processes and register delayed calls.
  132. @type log: L{Logger}
  133. @ivar log: The logger used to propagate log messages from spawned
  134. processes.
  135. """
  136. threshold = 1
  137. killTime = 5
  138. minRestartDelay = 1
  139. maxRestartDelay = 3600
  140. log = Logger()
  141. def __init__(self, reactor=_reactor):
  142. self._reactor = reactor
  143. self._processes = {}
  144. self.protocols = {}
  145. self.delay = {}
  146. self.timeStarted = {}
  147. self.murder = {}
  148. self.restart = {}
  149. @deprecate.deprecatedProperty(incremental.Version('Twisted', 18, 7, 0))
  150. def processes(self):
  151. """
  152. Processes as dict of tuples
  153. @return: Dict of process name to monitored processes as tuples
  154. """
  155. return {name: process.toTuple()
  156. for name, process in self._processes.items()}
  157. @deprecate.deprecated(incremental.Version('Twisted', 18, 7, 0))
  158. def __getstate__(self):
  159. dct = service.Service.__getstate__(self)
  160. del dct['_reactor']
  161. dct['protocols'] = {}
  162. dct['delay'] = {}
  163. dct['timeStarted'] = {}
  164. dct['murder'] = {}
  165. dct['restart'] = {}
  166. del dct['_processes']
  167. dct['processes'] = self.processes
  168. return dct
  169. def addProcess(self, name, args, uid=None, gid=None, env={}, cwd=None):
  170. """
  171. Add a new monitored process and start it immediately if the
  172. L{ProcessMonitor} service is running.
  173. Note that args are passed to the system call, not to the shell. If
  174. running the shell is desired, the common idiom is to use
  175. C{ProcessMonitor.addProcess("name", ['/bin/sh', '-c', shell_script])}
  176. @param name: A name for this process. This value must be
  177. unique across all processes added to this monitor.
  178. @type name: C{str}
  179. @param args: The argv sequence for the process to launch.
  180. @param uid: The user ID to use to run the process. If L{None},
  181. the current UID is used.
  182. @type uid: C{int}
  183. @param gid: The group ID to use to run the process. If L{None},
  184. the current GID is used.
  185. @type uid: C{int}
  186. @param env: The environment to give to the launched process. See
  187. L{IReactorProcess.spawnProcess}'s C{env} parameter.
  188. @type env: C{dict}
  189. @param cwd: The initial working directory of the launched process.
  190. The default of C{None} means inheriting the laucnhing process's
  191. working directory.
  192. @type env: C{dict}
  193. @raises: C{KeyError} if a process with the given name already
  194. exists
  195. """
  196. if name in self._processes:
  197. raise KeyError("remove %s first" % (name,))
  198. self._processes[name] = _Process(args, uid, gid, env, cwd)
  199. self.delay[name] = self.minRestartDelay
  200. if self.running:
  201. self.startProcess(name)
  202. def removeProcess(self, name):
  203. """
  204. Stop the named process and remove it from the list of monitored
  205. processes.
  206. @type name: C{str}
  207. @param name: A string that uniquely identifies the process.
  208. """
  209. self.stopProcess(name)
  210. del self._processes[name]
  211. def startService(self):
  212. """
  213. Start all monitored processes.
  214. """
  215. service.Service.startService(self)
  216. for name in list(self._processes):
  217. self.startProcess(name)
  218. def stopService(self):
  219. """
  220. Stop all monitored processes and cancel all scheduled process restarts.
  221. """
  222. service.Service.stopService(self)
  223. # Cancel any outstanding restarts
  224. for name, delayedCall in list(self.restart.items()):
  225. if delayedCall.active():
  226. delayedCall.cancel()
  227. for name in list(self._processes):
  228. self.stopProcess(name)
  229. def connectionLost(self, name):
  230. """
  231. Called when a monitored processes exits. If
  232. L{service.IService.running} is L{True} (ie the service is started), the
  233. process will be restarted.
  234. If the process had been running for more than
  235. L{ProcessMonitor.threshold} seconds it will be restarted immediately.
  236. If the process had been running for less than
  237. L{ProcessMonitor.threshold} seconds, the restart will be delayed and
  238. each time the process dies before the configured threshold, the restart
  239. delay will be doubled - up to a maximum delay of maxRestartDelay sec.
  240. @type name: C{str}
  241. @param name: A string that uniquely identifies the process
  242. which exited.
  243. """
  244. # Cancel the scheduled _forceStopProcess function if the process
  245. # dies naturally
  246. if name in self.murder:
  247. if self.murder[name].active():
  248. self.murder[name].cancel()
  249. del self.murder[name]
  250. del self.protocols[name]
  251. if self._reactor.seconds() - self.timeStarted[name] < self.threshold:
  252. # The process died too fast - backoff
  253. nextDelay = self.delay[name]
  254. self.delay[name] = min(self.delay[name] * 2, self.maxRestartDelay)
  255. else:
  256. # Process had been running for a significant amount of time
  257. # restart immediately
  258. nextDelay = 0
  259. self.delay[name] = self.minRestartDelay
  260. # Schedule a process restart if the service is running
  261. if self.running and name in self._processes:
  262. self.restart[name] = self._reactor.callLater(nextDelay,
  263. self.startProcess,
  264. name)
  265. def startProcess(self, name):
  266. """
  267. @param name: The name of the process to be started
  268. """
  269. # If a protocol instance already exists, it means the process is
  270. # already running
  271. if name in self.protocols:
  272. return
  273. process = self._processes[name]
  274. proto = LoggingProtocol()
  275. proto.service = self
  276. proto.name = name
  277. self.protocols[name] = proto
  278. self.timeStarted[name] = self._reactor.seconds()
  279. self._reactor.spawnProcess(proto, process.args[0], process.args,
  280. uid=process.uid, gid=process.gid,
  281. env=process.env, path=process.cwd)
  282. def _forceStopProcess(self, proc):
  283. """
  284. @param proc: An L{IProcessTransport} provider
  285. """
  286. try:
  287. proc.signalProcess('KILL')
  288. except error.ProcessExitedAlready:
  289. pass
  290. def stopProcess(self, name):
  291. """
  292. @param name: The name of the process to be stopped
  293. """
  294. if name not in self._processes:
  295. raise KeyError('Unrecognized process name: %s' % (name,))
  296. proto = self.protocols.get(name, None)
  297. if proto is not None:
  298. proc = proto.transport
  299. try:
  300. proc.signalProcess('TERM')
  301. except error.ProcessExitedAlready:
  302. pass
  303. else:
  304. self.murder[name] = self._reactor.callLater(
  305. self.killTime,
  306. self._forceStopProcess, proc)
  307. def restartAll(self):
  308. """
  309. Restart all processes. This is useful for third party management
  310. services to allow a user to restart servers because of an outside change
  311. in circumstances -- for example, a new version of a library is
  312. installed.
  313. """
  314. for name in self._processes:
  315. self.stopProcess(name)
  316. def __repr__(self):
  317. l = []
  318. for name, proc in self._processes.items():
  319. uidgid = ''
  320. if proc.uid is not None:
  321. uidgid = str(proc.uid)
  322. if proc.gid is not None:
  323. uidgid += ':'+str(proc.gid)
  324. if uidgid:
  325. uidgid = '(' + uidgid + ')'
  326. l.append('%r%s: %r' % (name, uidgid, proc.args))
  327. return ('<' + self.__class__.__name__ + ' '
  328. + ' '.join(l)
  329. + '>')