Browse Source

import tracing

* add argument to parser
* add out_path as fn parameter
* set necessary env variables for import tracing
kuzmich321 1 year ago
parent
commit
27c5889c53

+ 18 - 0
build/conf/python.conf

@@ -139,6 +139,9 @@ when ($OPENSOURCE) {
 PYTHON2=no
 PYTHON3=no
 
+# tag:python-specific
+PYTHON_IMPORT_TRACING=yes
+
 # tag:python-specific
 when (!$ARCADIA_PYTHON_UNICODE_SIZE) {
     when ($OS_WINDOWS) {
@@ -601,6 +604,10 @@ module _BASE_PY_PROGRAM: _BASE_PROGRAM {
         PEERDIR+=library/python/coverage
     }
 
+    when ($PYTHON_IMPORT_TRACING == "yes") {
+        PEERDIR += library/python/import_tracing/constructor
+    }
+
     when ($ARCH_PPC64LE == "yes") {
         _MY_ALLOCATOR=SYSTEM
     }
@@ -675,6 +682,9 @@ module _BASE_PY3_PROGRAM: _BASE_PROGRAM {
     when ($PYTHON_COVERAGE == "yes") {
         PEERDIR+=library/python/coverage
     }
+    when ($PYTHON_IMPORT_TRACING == "yes") {
+        PEERDIR += library/python/import_tracing/constructor
+    }
     when ($CODENAVIGATION && $NOCODENAVIGATION != "yes")  {
         PEERDIR += contrib/python/six
     }
@@ -846,6 +856,14 @@ macro NO_PYTHON_COVERAGE() {
     DISABLE(PYTHON_COVERAGE)
 }
 
+# tag:python-specific tag:import_tracing
+### @usage: NO_IMPORT_TRACING()
+###
+### Disable python coverage for module
+macro NO_IMPORT_TRACING() {
+    DISABLE(PYTHON_IMPORT_TRACING)
+}
+
 # tag:python-specific tag:coverage tag:cython
 ### @usage: NO_CYTHON_COVERAGE()
 ###

+ 10 - 0
library/python/import_tracing/constructor/__init__.py

@@ -0,0 +1,10 @@
+import os
+
+
+def init():
+    if "Y_PYTHON_TRACE_FILE" in os.environ:
+        import atexit
+        import library.python.import_tracing.lib.regulator as regulator
+
+        regulator.enable(os.getenv("Y_PYTHON_TRACE_FILE"))
+        atexit.register(regulator.disable)

+ 15 - 0
library/python/import_tracing/constructor/ya.make

@@ -0,0 +1,15 @@
+PY23_LIBRARY()
+
+STYLE_PYTHON()
+
+PY_CONSTRUCTOR(library.python.import_tracing.constructor)
+
+PY_SRCS(
+    __init__.py
+)
+
+PEERDIR(
+    library/python/import_tracing/lib
+)
+
+END()

+ 1 - 0
library/python/import_tracing/lib/constants.py

@@ -0,0 +1 @@
+MCS_IN_SEC = 1e6

+ 3 - 0
library/python/import_tracing/lib/converters/base.py

@@ -0,0 +1,3 @@
+class BaseTraceConverter:
+    def dump(self, events, filepath):
+        raise NotImplementedError()

+ 33 - 0
library/python/import_tracing/lib/converters/chrometrace.py

@@ -0,0 +1,33 @@
+import json
+import os
+
+import library.python.import_tracing.lib.converters.base as base_converter
+
+
+class ChromiumTraceConverter(base_converter.BaseTraceConverter):
+    @staticmethod
+    def _yield_in_chrome_trace_format(events, pid):
+        for event in events:
+            yield {
+                "cat": event.modname,
+                "name": event.filename,
+                "ph": "B",
+                "ts": event.start_time,
+                "pid": pid,
+                "tid": event.tid,
+                "args": {},
+            }
+
+            yield {
+                "cat": event.modname,
+                "name": event.filename,
+                "ph": "E",
+                "ts": event.end_time,
+                "pid": pid,
+                "tid": event.tid,
+            }
+
+    def dump(self, events, filepath):
+        pid = os.getpid()
+        with open(filepath, "w") as file:
+            file.write(json.dumps(tuple(self._yield_in_chrome_trace_format(events, pid))))

+ 56 - 0
library/python/import_tracing/lib/converters/raw.py

@@ -0,0 +1,56 @@
+import library.python.import_tracing.lib.converters.base as base_converter
+import library.python.import_tracing.lib.constants as constants
+
+
+class RawTextTraceConverter(base_converter.BaseTraceConverter):
+    @staticmethod
+    def _get_columns_length(events):
+        max_filename = 0
+        max_cumtime = 0
+        max_end_time = 0
+
+        for event in events:
+            max_filename = max(max_filename, len(event.filename))
+            max_cumtime = max(max_cumtime, event.end_time - event.start_time)
+            max_end_time = max(max_end_time, event.end_time)
+
+        return len(str(max_cumtime)), max_filename, max_end_time
+
+    @staticmethod
+    def _get_sorted_events(events):
+        return sorted(events, key=lambda event: event.end_time - event.start_time, reverse=True)
+
+    @staticmethod
+    def _format_line(cumtime, filename, max_cumtime, max_filename):
+        return "{0:<{max_cumtime}}\t{1:<{max_filename}}\n".format(
+            cumtime,
+            filename,
+            max_cumtime=max_cumtime,
+            max_filename=max_filename,
+        )
+
+    def dump(self, events, filepath):
+        max_cumtime, max_filename, max_end_time = self._get_columns_length(events)
+        max_line_length = max_cumtime + max_filename
+
+        with open(filepath, "w") as file:
+            # total time taken
+            file.write("total time taken (seconds): {0:.4f}\n".format(max_end_time / constants.MCS_IN_SEC))
+            file.write("-" * max_line_length + "\n")
+
+            # header
+            file.write(self._format_line("cumtime", "filename", max_cumtime, max_filename))
+            file.write("-" * max_line_length + "\n")
+
+            # trace info
+            for event in self._get_sorted_events(events):
+                time_taken = format(((event.end_time - event.start_time) / constants.MCS_IN_SEC), ".6f")
+
+                file.write(
+                    self._format_line(
+                        time_taken,
+                        event.filename,
+                        max_cumtime,
+                        max_filename,
+                    )
+                )

+ 9 - 0
library/python/import_tracing/lib/event.py

@@ -0,0 +1,9 @@
+class Event:
+    __slots__ = ("modname", "filename", "tid", "start_time", "end_time")
+
+    def __init__(self, modname, filename, tid=None, start_time=None, end_time=None):
+        self.modname = modname
+        self.filename = filename
+        self.tid = tid
+        self.start_time = start_time
+        self.end_time = end_time

+ 52 - 0
library/python/import_tracing/lib/import_tracer.py

@@ -0,0 +1,52 @@
+import threading
+import time
+import collections
+import library.python.import_tracing.lib.event as events
+import library.python.import_tracing.lib.constants as constants
+
+
+class ImportTracer:
+    def __init__(self):
+        self.events = collections.OrderedDict()
+        self.start_time = time.time()
+
+    def start_event(self, modname, filename, tid=None):
+        tid = tid if tid is not None else threading.current_thread().ident
+        time_from_start = self._get_current_time_from_start()
+
+        event_key = (modname, tid)
+        new_event = events.Event(
+            modname=modname,
+            filename=filename,
+            tid=tid,
+            start_time=time_from_start,
+            end_time=None,
+        )
+
+        self.events[event_key] = new_event
+
+    def finish_event(self, modname, filename, tid=None):
+        tid = tid if tid is not None else threading.current_thread().ident
+        event_key = (modname, tid)
+        event = self.events[event_key]
+
+        end_time = self._get_current_time_from_start()
+        event.end_time = end_time
+
+    def get_events(self, close_not_finished=False):
+        end_time = self._get_current_time_from_start()
+
+        for event in self.events.values():
+            if close_not_finished and event.end_time is None:
+                yield events.Event(
+                    modname=event.modname,
+                    filename=event.filename,
+                    tid=event.tid,
+                    start_time=event.start_time,
+                    end_time=end_time,
+                )
+            else:
+                yield event
+
+    def _get_current_time_from_start(self):
+        return (time.time() - self.start_time) * constants.MCS_IN_SEC

+ 73 - 0
library/python/import_tracing/lib/regulator.py

@@ -0,0 +1,73 @@
+import collections
+import os
+
+_Instance = collections.namedtuple("_Instance", ("import_tracer", "converter", "filepath"))
+
+INSTANCE = None
+
+
+def _get_converter_instance():
+    import library.python.import_tracing.lib.converters.raw as text_converter
+    import library.python.import_tracing.lib.converters.chrometrace as chrome_converter
+
+    converter_mapping = {"text": text_converter.RawTextTraceConverter, "evlog": chrome_converter.ChromiumTraceConverter}
+
+    env_val = os.getenv("Y_PYTHON_TRACE_FORMAT")
+
+    converter = converter_mapping.get(env_val, text_converter.RawTextTraceConverter)
+
+    return converter()
+
+
+def _resolve_filepath(filemask):
+    import socket
+    import sys
+
+    pid = os.getpid()
+    hostname = socket.gethostname()
+    executable_filename = os.path.basename(sys.executable)
+
+    return filemask.replace("%p", str(pid)).replace("%h", hostname).replace("%e", executable_filename)
+
+
+def enable(filemask):
+    import library.python.import_tracing.lib.import_tracer as import_tracer
+    import __res
+
+    global INSTANCE
+
+    if INSTANCE is not None:
+        return INSTANCE
+
+    converter = _get_converter_instance()
+    import_tracer = import_tracer.ImportTracer()
+
+    def before_import_callback(modname, filename):
+        import_tracer.start_event(modname, filename)
+
+    def after_import_callback(modname, filename):
+        import_tracer.finish_event(modname, filename)
+
+    __res.importer.set_callbacks(before_import_callback, after_import_callback)
+
+    filepath = _resolve_filepath(filemask)
+
+    new_instance = _Instance(import_tracer, converter, filepath)
+    INSTANCE = new_instance
+
+    return new_instance
+
+
+def disable(close_not_finished=False):
+    global INSTANCE
+
+    if INSTANCE is None:
+        return
+
+    import_tracer = INSTANCE.import_tracer
+    converter = INSTANCE.converter
+    filepath = INSTANCE.filepath
+
+    converter.dump(import_tracer.get_events(close_not_finished), filepath)
+
+    INSTANCE = None

Some files were not shown because too many files changed in this diff