# -*- test-case-name: twisted.logger.test.test_json -*- # Copyright (c) Twisted Matrix Laboratories. # See LICENSE for details. """ Tools for saving and loading log events in a structured format. """ import types from constantly import NamedConstant from json import dumps, loads from uuid import UUID from ._flatten import flattenEvent from ._file import FileLogObserver from ._levels import LogLevel from ._logger import Logger from twisted.python.compat import unicode, _PY3 from twisted.python.failure import Failure log = Logger() def failureAsJSON(failure): """ Convert a failure to a JSON-serializable data structure. @param failure: A failure to serialize. @type failure: L{Failure} @return: a mapping of strings to ... stuff, mostly reminiscent of L{Failure.__getstate__} @rtype: L{dict} """ return dict( failure.__getstate__(), type=dict( __module__=failure.type.__module__, __name__=failure.type.__name__, ) ) def asBytes(obj): """ On Python 2, we really need native strings in a variety of places; attribute names will sort of work in a __dict__, but they're subtly wrong; however, printing tracebacks relies on I/O to containers that only support bytes. This function converts _all_ native strings within a JSON-deserialized object to bytes. @param obj: An object to convert to bytes. @type obj: L{object} @return: A string of UTF-8 bytes. @rtype: L{bytes} """ if isinstance(obj, list): return map(asBytes, obj) elif isinstance(obj, dict): return dict((asBytes(k), asBytes(v)) for k, v in obj.items()) elif isinstance(obj, unicode): return obj.encode("utf-8") else: return obj def failureFromJSON(failureDict): """ Load a L{Failure} from a dictionary deserialized from JSON. @param failureDict: a JSON-deserialized object like one previously returned by L{failureAsJSON}. @type failureDict: L{dict} mapping L{unicode} to attributes @return: L{Failure} @rtype: L{Failure} """ # InstanceType() is only available in Python 2 and lower. # __new__ is only available on new-style classes. newFailure = getattr(Failure, "__new__", None) if newFailure is None: f = types.InstanceType(Failure) else: f = newFailure(Failure) if not _PY3: # Python 2 needs the failure dictionary as purely bytes, not text failureDict = asBytes(failureDict) typeInfo = failureDict["type"] failureDict["type"] = type(typeInfo["__name__"], (), typeInfo) f.__dict__ = failureDict return f classInfo = [ ( lambda level: ( isinstance(level, NamedConstant) and getattr(LogLevel, level.name, None) is level ), UUID("02E59486-F24D-46AD-8224-3ACDF2A5732A"), lambda level: dict(name=level.name), lambda level: getattr(LogLevel, level["name"], None) ), ( lambda o: isinstance(o, Failure), UUID("E76887E2-20ED-49BF-A8F8-BA25CC586F2D"), failureAsJSON, failureFromJSON ), ] uuidToLoader = dict([ (uuid, loader) for (predicate, uuid, saver, loader) in classInfo ]) def objectLoadHook(aDict): """ Dictionary-to-object-translation hook for certain value types used within the logging system. @see: the C{object_hook} parameter to L{json.load} @param aDict: A dictionary loaded from a JSON object. @type aDict: L{dict} @return: C{aDict} itself, or the object represented by C{aDict} @rtype: L{object} """ if "__class_uuid__" in aDict: return uuidToLoader[UUID(aDict["__class_uuid__"])](aDict) return aDict def objectSaveHook(pythonObject): """ Object-to-serializable hook for certain value types used within the logging system. @see: the C{default} parameter to L{json.dump} @param pythonObject: Any object. @type pythonObject: L{object} @return: If the object is one of the special types the logging system supports, a specially-formatted dictionary; otherwise, a marker dictionary indicating that it could not be serialized. """ for (predicate, uuid, saver, loader) in classInfo: if predicate(pythonObject): result = saver(pythonObject) result["__class_uuid__"] = str(uuid) return result return {"unpersistable": True} def eventAsJSON(event): """ Encode an event as JSON, flattening it if necessary to preserve as much structure as possible. Not all structure from the log event will be preserved when it is serialized. @param event: A log event dictionary. @type event: L{dict} with arbitrary keys and values @return: A string of the serialized JSON; note that this will contain no newline characters, and may thus safely be stored in a line-delimited file. @rtype: L{unicode} """ if bytes is str: kw = dict(default=objectSaveHook, encoding="charmap", skipkeys=True) else: def default(unencodable): """ Serialize an object not otherwise serializable by L{dumps}. @param unencodable: An unencodable object. @return: C{unencodable}, serialized """ if isinstance(unencodable, bytes): return unencodable.decode("charmap") return objectSaveHook(unencodable) kw = dict(default=default, skipkeys=True) flattenEvent(event) result = dumps(event, **kw) if not isinstance(result, unicode): return unicode(result, "utf-8", "replace") return result def eventFromJSON(eventText): """ Decode a log event from JSON. @param eventText: The output of a previous call to L{eventAsJSON} @type eventText: L{unicode} @return: A reconstructed version of the log event. @rtype: L{dict} """ loaded = loads(eventText, object_hook=objectLoadHook) return loaded def jsonFileLogObserver(outFile, recordSeparator=u"\x1e"): """ Create a L{FileLogObserver} that emits JSON-serialized events to a specified (writable) file-like object. Events are written in the following form:: RS + JSON + NL C{JSON} is the serialized event, which is JSON text. C{NL} is a newline (C{u"\\n"}). C{RS} is a record separator. By default, this is a single RS character (C{u"\\x1e"}), which makes the default output conform to the IETF draft document "draft-ietf-json-text-sequence-13". @param outFile: A file-like object. Ideally one should be passed which accepts L{unicode} data. Otherwise, UTF-8 L{bytes} will be used. @type outFile: L{io.IOBase} @param recordSeparator: The record separator to use. @type recordSeparator: L{unicode} @return: A file log observer. @rtype: L{FileLogObserver} """ return FileLogObserver( outFile, lambda event: u"{0}{1}\n".format(recordSeparator, eventAsJSON(event)) ) def eventsFromJSONLogFile(inFile, recordSeparator=None, bufferSize=4096): """ Load events from a file previously saved with L{jsonFileLogObserver}. Event records that are truncated or otherwise unreadable are ignored. @param inFile: A (readable) file-like object. Data read from C{inFile} should be L{unicode} or UTF-8 L{bytes}. @type inFile: iterable of lines @param recordSeparator: The expected record separator. If L{None}, attempt to automatically detect the record separator from one of C{u"\\x1e"} or C{u""}. @type recordSeparator: L{unicode} @param bufferSize: The size of the read buffer used while reading from C{inFile}. @type bufferSize: integer @return: Log events as read from C{inFile}. @rtype: iterable of L{dict} """ def asBytes(s): if type(s) is bytes: return s else: return s.encode("utf-8") def eventFromBytearray(record): try: text = bytes(record).decode("utf-8") except UnicodeDecodeError: log.error( u"Unable to decode UTF-8 for JSON record: {record!r}", record=bytes(record) ) return None try: return eventFromJSON(text) except ValueError: log.error( u"Unable to read JSON record: {record!r}", record=bytes(record) ) return None if recordSeparator is None: first = asBytes(inFile.read(1)) if first == b"\x1e": # This looks json-text-sequence compliant. recordSeparator = first else: # Default to simpler newline-separated stream, which does not use # a record separator. recordSeparator = b"" else: recordSeparator = asBytes(recordSeparator) first = b"" if recordSeparator == b"": recordSeparator = b"\n" # Split on newlines below eventFromRecord = eventFromBytearray else: def eventFromRecord(record): if record[-1] == ord("\n"): return eventFromBytearray(record) else: log.error( u"Unable to read truncated JSON record: {record!r}", record=bytes(record) ) return None buffer = bytearray(first) while True: newData = inFile.read(bufferSize) if not newData: if len(buffer) > 0: event = eventFromRecord(buffer) if event is not None: yield event break buffer += asBytes(newData) records = buffer.split(recordSeparator) for record in records[:-1]: if len(record) > 0: event = eventFromRecord(record) if event is not None: yield event buffer = records[-1]