_json.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. # -*- test-case-name: twisted.logger.test.test_json -*-
  2. # Copyright (c) Twisted Matrix Laboratories.
  3. # See LICENSE for details.
  4. """
  5. Tools for saving and loading log events in a structured format.
  6. """
  7. import types
  8. from constantly import NamedConstant
  9. from json import dumps, loads
  10. from uuid import UUID
  11. from ._flatten import flattenEvent
  12. from ._file import FileLogObserver
  13. from ._levels import LogLevel
  14. from ._logger import Logger
  15. from twisted.python.compat import unicode, _PY3
  16. from twisted.python.failure import Failure
  17. log = Logger()
  18. def failureAsJSON(failure):
  19. """
  20. Convert a failure to a JSON-serializable data structure.
  21. @param failure: A failure to serialize.
  22. @type failure: L{Failure}
  23. @return: a mapping of strings to ... stuff, mostly reminiscent of
  24. L{Failure.__getstate__}
  25. @rtype: L{dict}
  26. """
  27. return dict(
  28. failure.__getstate__(),
  29. type=dict(
  30. __module__=failure.type.__module__,
  31. __name__=failure.type.__name__,
  32. )
  33. )
  34. def asBytes(obj):
  35. """
  36. On Python 2, we really need native strings in a variety of places;
  37. attribute names will sort of work in a __dict__, but they're subtly wrong;
  38. however, printing tracebacks relies on I/O to containers that only support
  39. bytes. This function converts _all_ native strings within a
  40. JSON-deserialized object to bytes.
  41. @param obj: An object to convert to bytes.
  42. @type obj: L{object}
  43. @return: A string of UTF-8 bytes.
  44. @rtype: L{bytes}
  45. """
  46. if isinstance(obj, list):
  47. return map(asBytes, obj)
  48. elif isinstance(obj, dict):
  49. return dict((asBytes(k), asBytes(v)) for k, v in obj.items())
  50. elif isinstance(obj, unicode):
  51. return obj.encode("utf-8")
  52. else:
  53. return obj
  54. def failureFromJSON(failureDict):
  55. """
  56. Load a L{Failure} from a dictionary deserialized from JSON.
  57. @param failureDict: a JSON-deserialized object like one previously returned
  58. by L{failureAsJSON}.
  59. @type failureDict: L{dict} mapping L{unicode} to attributes
  60. @return: L{Failure}
  61. @rtype: L{Failure}
  62. """
  63. # InstanceType() is only available in Python 2 and lower.
  64. # __new__ is only available on new-style classes.
  65. newFailure = getattr(Failure, "__new__", None)
  66. if newFailure is None:
  67. f = types.InstanceType(Failure)
  68. else:
  69. f = newFailure(Failure)
  70. if not _PY3:
  71. # Python 2 needs the failure dictionary as purely bytes, not text
  72. failureDict = asBytes(failureDict)
  73. typeInfo = failureDict["type"]
  74. failureDict["type"] = type(typeInfo["__name__"], (), typeInfo)
  75. f.__dict__ = failureDict
  76. return f
  77. classInfo = [
  78. (
  79. lambda level: (
  80. isinstance(level, NamedConstant) and
  81. getattr(LogLevel, level.name, None) is level
  82. ),
  83. UUID("02E59486-F24D-46AD-8224-3ACDF2A5732A"),
  84. lambda level: dict(name=level.name),
  85. lambda level: getattr(LogLevel, level["name"], None)
  86. ),
  87. (
  88. lambda o: isinstance(o, Failure),
  89. UUID("E76887E2-20ED-49BF-A8F8-BA25CC586F2D"),
  90. failureAsJSON, failureFromJSON
  91. ),
  92. ]
  93. uuidToLoader = dict([
  94. (uuid, loader) for (predicate, uuid, saver, loader) in classInfo
  95. ])
  96. def objectLoadHook(aDict):
  97. """
  98. Dictionary-to-object-translation hook for certain value types used within
  99. the logging system.
  100. @see: the C{object_hook} parameter to L{json.load}
  101. @param aDict: A dictionary loaded from a JSON object.
  102. @type aDict: L{dict}
  103. @return: C{aDict} itself, or the object represented by C{aDict}
  104. @rtype: L{object}
  105. """
  106. if "__class_uuid__" in aDict:
  107. return uuidToLoader[UUID(aDict["__class_uuid__"])](aDict)
  108. return aDict
  109. def objectSaveHook(pythonObject):
  110. """
  111. Object-to-serializable hook for certain value types used within the logging
  112. system.
  113. @see: the C{default} parameter to L{json.dump}
  114. @param pythonObject: Any object.
  115. @type pythonObject: L{object}
  116. @return: If the object is one of the special types the logging system
  117. supports, a specially-formatted dictionary; otherwise, a marker
  118. dictionary indicating that it could not be serialized.
  119. """
  120. for (predicate, uuid, saver, loader) in classInfo:
  121. if predicate(pythonObject):
  122. result = saver(pythonObject)
  123. result["__class_uuid__"] = str(uuid)
  124. return result
  125. return {"unpersistable": True}
  126. def eventAsJSON(event):
  127. """
  128. Encode an event as JSON, flattening it if necessary to preserve as much
  129. structure as possible.
  130. Not all structure from the log event will be preserved when it is
  131. serialized.
  132. @param event: A log event dictionary.
  133. @type event: L{dict} with arbitrary keys and values
  134. @return: A string of the serialized JSON; note that this will contain no
  135. newline characters, and may thus safely be stored in a line-delimited
  136. file.
  137. @rtype: L{unicode}
  138. """
  139. if bytes is str:
  140. kw = dict(default=objectSaveHook, encoding="charmap", skipkeys=True)
  141. else:
  142. def default(unencodable):
  143. """
  144. Serialize an object not otherwise serializable by L{dumps}.
  145. @param unencodable: An unencodable object.
  146. @return: C{unencodable}, serialized
  147. """
  148. if isinstance(unencodable, bytes):
  149. return unencodable.decode("charmap")
  150. return objectSaveHook(unencodable)
  151. kw = dict(default=default, skipkeys=True)
  152. flattenEvent(event)
  153. result = dumps(event, **kw)
  154. if not isinstance(result, unicode):
  155. return unicode(result, "utf-8", "replace")
  156. return result
  157. def eventFromJSON(eventText):
  158. """
  159. Decode a log event from JSON.
  160. @param eventText: The output of a previous call to L{eventAsJSON}
  161. @type eventText: L{unicode}
  162. @return: A reconstructed version of the log event.
  163. @rtype: L{dict}
  164. """
  165. loaded = loads(eventText, object_hook=objectLoadHook)
  166. return loaded
  167. def jsonFileLogObserver(outFile, recordSeparator=u"\x1e"):
  168. """
  169. Create a L{FileLogObserver} that emits JSON-serialized events to a
  170. specified (writable) file-like object.
  171. Events are written in the following form::
  172. RS + JSON + NL
  173. C{JSON} is the serialized event, which is JSON text. C{NL} is a newline
  174. (C{u"\\n"}). C{RS} is a record separator. By default, this is a single
  175. RS character (C{u"\\x1e"}), which makes the default output conform to the
  176. IETF draft document "draft-ietf-json-text-sequence-13".
  177. @param outFile: A file-like object. Ideally one should be passed which
  178. accepts L{unicode} data. Otherwise, UTF-8 L{bytes} will be used.
  179. @type outFile: L{io.IOBase}
  180. @param recordSeparator: The record separator to use.
  181. @type recordSeparator: L{unicode}
  182. @return: A file log observer.
  183. @rtype: L{FileLogObserver}
  184. """
  185. return FileLogObserver(
  186. outFile,
  187. lambda event: u"{0}{1}\n".format(recordSeparator, eventAsJSON(event))
  188. )
  189. def eventsFromJSONLogFile(inFile, recordSeparator=None, bufferSize=4096):
  190. """
  191. Load events from a file previously saved with L{jsonFileLogObserver}.
  192. Event records that are truncated or otherwise unreadable are ignored.
  193. @param inFile: A (readable) file-like object. Data read from C{inFile}
  194. should be L{unicode} or UTF-8 L{bytes}.
  195. @type inFile: iterable of lines
  196. @param recordSeparator: The expected record separator.
  197. If L{None}, attempt to automatically detect the record separator from
  198. one of C{u"\\x1e"} or C{u""}.
  199. @type recordSeparator: L{unicode}
  200. @param bufferSize: The size of the read buffer used while reading from
  201. C{inFile}.
  202. @type bufferSize: integer
  203. @return: Log events as read from C{inFile}.
  204. @rtype: iterable of L{dict}
  205. """
  206. def asBytes(s):
  207. if type(s) is bytes:
  208. return s
  209. else:
  210. return s.encode("utf-8")
  211. def eventFromBytearray(record):
  212. try:
  213. text = bytes(record).decode("utf-8")
  214. except UnicodeDecodeError:
  215. log.error(
  216. u"Unable to decode UTF-8 for JSON record: {record!r}",
  217. record=bytes(record)
  218. )
  219. return None
  220. try:
  221. return eventFromJSON(text)
  222. except ValueError:
  223. log.error(
  224. u"Unable to read JSON record: {record!r}",
  225. record=bytes(record)
  226. )
  227. return None
  228. if recordSeparator is None:
  229. first = asBytes(inFile.read(1))
  230. if first == b"\x1e":
  231. # This looks json-text-sequence compliant.
  232. recordSeparator = first
  233. else:
  234. # Default to simpler newline-separated stream, which does not use
  235. # a record separator.
  236. recordSeparator = b""
  237. else:
  238. recordSeparator = asBytes(recordSeparator)
  239. first = b""
  240. if recordSeparator == b"":
  241. recordSeparator = b"\n" # Split on newlines below
  242. eventFromRecord = eventFromBytearray
  243. else:
  244. def eventFromRecord(record):
  245. if record[-1] == ord("\n"):
  246. return eventFromBytearray(record)
  247. else:
  248. log.error(
  249. u"Unable to read truncated JSON record: {record!r}",
  250. record=bytes(record)
  251. )
  252. return None
  253. buffer = bytearray(first)
  254. while True:
  255. newData = inFile.read(bufferSize)
  256. if not newData:
  257. if len(buffer) > 0:
  258. event = eventFromRecord(buffer)
  259. if event is not None:
  260. yield event
  261. break
  262. buffer += asBytes(newData)
  263. records = buffer.split(recordSeparator)
  264. for record in records[:-1]:
  265. if len(record) > 0:
  266. event = eventFromRecord(record)
  267. if event is not None:
  268. yield event
  269. buffer = records[-1]