123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355 |
- # -*- test-case-name: twisted.logger.test.test_json -*-
- # Copyright (c) Twisted Matrix Laboratories.
- # See LICENSE for details.
- """
- Tools for saving and loading log events in a structured format.
- """
- import types
- from constantly import NamedConstant
- from json import dumps, loads
- from uuid import UUID
- from ._flatten import flattenEvent
- from ._file import FileLogObserver
- from ._levels import LogLevel
- from ._logger import Logger
- from twisted.python.compat import unicode, _PY3
- from twisted.python.failure import Failure
- log = Logger()
- def failureAsJSON(failure):
- """
- Convert a failure to a JSON-serializable data structure.
- @param failure: A failure to serialize.
- @type failure: L{Failure}
- @return: a mapping of strings to ... stuff, mostly reminiscent of
- L{Failure.__getstate__}
- @rtype: L{dict}
- """
- return dict(
- failure.__getstate__(),
- type=dict(
- __module__=failure.type.__module__,
- __name__=failure.type.__name__,
- )
- )
- def asBytes(obj):
- """
- On Python 2, we really need native strings in a variety of places;
- attribute names will sort of work in a __dict__, but they're subtly wrong;
- however, printing tracebacks relies on I/O to containers that only support
- bytes. This function converts _all_ native strings within a
- JSON-deserialized object to bytes.
- @param obj: An object to convert to bytes.
- @type obj: L{object}
- @return: A string of UTF-8 bytes.
- @rtype: L{bytes}
- """
- if isinstance(obj, list):
- return map(asBytes, obj)
- elif isinstance(obj, dict):
- return dict((asBytes(k), asBytes(v)) for k, v in obj.items())
- elif isinstance(obj, unicode):
- return obj.encode("utf-8")
- else:
- return obj
- def failureFromJSON(failureDict):
- """
- Load a L{Failure} from a dictionary deserialized from JSON.
- @param failureDict: a JSON-deserialized object like one previously returned
- by L{failureAsJSON}.
- @type failureDict: L{dict} mapping L{unicode} to attributes
- @return: L{Failure}
- @rtype: L{Failure}
- """
- # InstanceType() is only available in Python 2 and lower.
- # __new__ is only available on new-style classes.
- newFailure = getattr(Failure, "__new__", None)
- if newFailure is None:
- f = types.InstanceType(Failure)
- else:
- f = newFailure(Failure)
- if not _PY3:
- # Python 2 needs the failure dictionary as purely bytes, not text
- failureDict = asBytes(failureDict)
- typeInfo = failureDict["type"]
- failureDict["type"] = type(typeInfo["__name__"], (), typeInfo)
- f.__dict__ = failureDict
- return f
- classInfo = [
- (
- lambda level: (
- isinstance(level, NamedConstant) and
- getattr(LogLevel, level.name, None) is level
- ),
- UUID("02E59486-F24D-46AD-8224-3ACDF2A5732A"),
- lambda level: dict(name=level.name),
- lambda level: getattr(LogLevel, level["name"], None)
- ),
- (
- lambda o: isinstance(o, Failure),
- UUID("E76887E2-20ED-49BF-A8F8-BA25CC586F2D"),
- failureAsJSON, failureFromJSON
- ),
- ]
- uuidToLoader = dict([
- (uuid, loader) for (predicate, uuid, saver, loader) in classInfo
- ])
- def objectLoadHook(aDict):
- """
- Dictionary-to-object-translation hook for certain value types used within
- the logging system.
- @see: the C{object_hook} parameter to L{json.load}
- @param aDict: A dictionary loaded from a JSON object.
- @type aDict: L{dict}
- @return: C{aDict} itself, or the object represented by C{aDict}
- @rtype: L{object}
- """
- if "__class_uuid__" in aDict:
- return uuidToLoader[UUID(aDict["__class_uuid__"])](aDict)
- return aDict
- def objectSaveHook(pythonObject):
- """
- Object-to-serializable hook for certain value types used within the logging
- system.
- @see: the C{default} parameter to L{json.dump}
- @param pythonObject: Any object.
- @type pythonObject: L{object}
- @return: If the object is one of the special types the logging system
- supports, a specially-formatted dictionary; otherwise, a marker
- dictionary indicating that it could not be serialized.
- """
- for (predicate, uuid, saver, loader) in classInfo:
- if predicate(pythonObject):
- result = saver(pythonObject)
- result["__class_uuid__"] = str(uuid)
- return result
- return {"unpersistable": True}
- def eventAsJSON(event):
- """
- Encode an event as JSON, flattening it if necessary to preserve as much
- structure as possible.
- Not all structure from the log event will be preserved when it is
- serialized.
- @param event: A log event dictionary.
- @type event: L{dict} with arbitrary keys and values
- @return: A string of the serialized JSON; note that this will contain no
- newline characters, and may thus safely be stored in a line-delimited
- file.
- @rtype: L{unicode}
- """
- if bytes is str:
- kw = dict(default=objectSaveHook, encoding="charmap", skipkeys=True)
- else:
- def default(unencodable):
- """
- Serialize an object not otherwise serializable by L{dumps}.
- @param unencodable: An unencodable object.
- @return: C{unencodable}, serialized
- """
- if isinstance(unencodable, bytes):
- return unencodable.decode("charmap")
- return objectSaveHook(unencodable)
- kw = dict(default=default, skipkeys=True)
- flattenEvent(event)
- result = dumps(event, **kw)
- if not isinstance(result, unicode):
- return unicode(result, "utf-8", "replace")
- return result
- def eventFromJSON(eventText):
- """
- Decode a log event from JSON.
- @param eventText: The output of a previous call to L{eventAsJSON}
- @type eventText: L{unicode}
- @return: A reconstructed version of the log event.
- @rtype: L{dict}
- """
- loaded = loads(eventText, object_hook=objectLoadHook)
- return loaded
- def jsonFileLogObserver(outFile, recordSeparator=u"\x1e"):
- """
- Create a L{FileLogObserver} that emits JSON-serialized events to a
- specified (writable) file-like object.
- Events are written in the following form::
- RS + JSON + NL
- C{JSON} is the serialized event, which is JSON text. C{NL} is a newline
- (C{u"\\n"}). C{RS} is a record separator. By default, this is a single
- RS character (C{u"\\x1e"}), which makes the default output conform to the
- IETF draft document "draft-ietf-json-text-sequence-13".
- @param outFile: A file-like object. Ideally one should be passed which
- accepts L{unicode} data. Otherwise, UTF-8 L{bytes} will be used.
- @type outFile: L{io.IOBase}
- @param recordSeparator: The record separator to use.
- @type recordSeparator: L{unicode}
- @return: A file log observer.
- @rtype: L{FileLogObserver}
- """
- return FileLogObserver(
- outFile,
- lambda event: u"{0}{1}\n".format(recordSeparator, eventAsJSON(event))
- )
- def eventsFromJSONLogFile(inFile, recordSeparator=None, bufferSize=4096):
- """
- Load events from a file previously saved with L{jsonFileLogObserver}.
- Event records that are truncated or otherwise unreadable are ignored.
- @param inFile: A (readable) file-like object. Data read from C{inFile}
- should be L{unicode} or UTF-8 L{bytes}.
- @type inFile: iterable of lines
- @param recordSeparator: The expected record separator.
- If L{None}, attempt to automatically detect the record separator from
- one of C{u"\\x1e"} or C{u""}.
- @type recordSeparator: L{unicode}
- @param bufferSize: The size of the read buffer used while reading from
- C{inFile}.
- @type bufferSize: integer
- @return: Log events as read from C{inFile}.
- @rtype: iterable of L{dict}
- """
- def asBytes(s):
- if type(s) is bytes:
- return s
- else:
- return s.encode("utf-8")
- def eventFromBytearray(record):
- try:
- text = bytes(record).decode("utf-8")
- except UnicodeDecodeError:
- log.error(
- u"Unable to decode UTF-8 for JSON record: {record!r}",
- record=bytes(record)
- )
- return None
- try:
- return eventFromJSON(text)
- except ValueError:
- log.error(
- u"Unable to read JSON record: {record!r}",
- record=bytes(record)
- )
- return None
- if recordSeparator is None:
- first = asBytes(inFile.read(1))
- if first == b"\x1e":
- # This looks json-text-sequence compliant.
- recordSeparator = first
- else:
- # Default to simpler newline-separated stream, which does not use
- # a record separator.
- recordSeparator = b""
- else:
- recordSeparator = asBytes(recordSeparator)
- first = b""
- if recordSeparator == b"":
- recordSeparator = b"\n" # Split on newlines below
- eventFromRecord = eventFromBytearray
- else:
- def eventFromRecord(record):
- if record[-1] == ord("\n"):
- return eventFromBytearray(record)
- else:
- log.error(
- u"Unable to read truncated JSON record: {record!r}",
- record=bytes(record)
- )
- return None
- buffer = bytearray(first)
- while True:
- newData = inFile.read(bufferSize)
- if not newData:
- if len(buffer) > 0:
- event = eventFromRecord(buffer)
- if event is not None:
- yield event
- break
- buffer += asBytes(newData)
- records = buffer.split(recordSeparator)
- for record in records[:-1]:
- if len(record) > 0:
- event = eventFromRecord(record)
- if event is not None:
- yield event
- buffer = records[-1]
|