# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt """Callback functions and support for sys.monitoring data collection.""" from __future__ import annotations import functools import inspect import os import os.path import sys import threading import traceback from dataclasses import dataclass from types import CodeType from typing import ( Any, Callable, NewType, Optional, cast, ) from coverage import env from coverage.bytecode import TBranchTrails, branch_trails from coverage.debug import short_filename, short_stack from coverage.misc import isolate_module from coverage.types import ( AnyCallable, TFileDisposition, TLineNo, TOffset, TShouldStartContextFn, TShouldTraceFn, TTraceData, TTraceFileData, Tracer, TWarnFn, ) os = isolate_module(os) # pylint: disable=unused-argument # $set_env.py: COVERAGE_SYSMON_LOG - Log sys.monitoring activity LOG = bool(int(os.getenv("COVERAGE_SYSMON_LOG", 0))) # $set_env.py: COVERAGE_SYSMON_STATS - Collect sys.monitoring stats COLLECT_STATS = bool(int(os.getenv("COVERAGE_SYSMON_STATS", 0))) # This module will be imported in all versions of Python, but only used in 3.12+ # It will be type-checked for 3.12, but not for earlier versions. sys_monitoring = getattr(sys, "monitoring", None) DISABLE_TYPE = NewType("DISABLE_TYPE", object) MonitorReturn = Optional[DISABLE_TYPE] DISABLE = cast(MonitorReturn, getattr(sys_monitoring, "DISABLE", None)) if LOG: # pragma: debugging class LoggingWrapper: """Wrap a namespace to log all its functions.""" def __init__(self, wrapped: Any, namespace: str) -> None: self.wrapped = wrapped self.namespace = namespace def __getattr__(self, name: str) -> Callable[..., Any]: def _wrapped(*args: Any, **kwargs: Any) -> Any: log(f"{self.namespace}.{name}{args}{kwargs}") return getattr(self.wrapped, name)(*args, **kwargs) return _wrapped sys_monitoring = LoggingWrapper(sys_monitoring, "sys.monitoring") assert sys_monitoring is not None short_stack = functools.partial( short_stack, full=True, short_filenames=True, frame_ids=True, ) seen_threads: set[int] = set() def log(msg: str) -> None: """Write a message to our detailed debugging log(s).""" # Thread ids are reused across processes? # Make a shorter number more likely to be unique. pid = os.getpid() tid = cast(int, threading.current_thread().ident) tslug = f"{(pid * tid) % 9_999_991:07d}" if tid not in seen_threads: seen_threads.add(tid) log(f"New thread {tid} {tslug}:\n{short_stack()}") # log_seq = int(os.getenv("PANSEQ", "0")) # root = f"/tmp/pan.{log_seq:03d}" for filename in [ "/tmp/foo.out", # f"{root}.out", # f"{root}-{pid}.out", # f"{root}-{pid}-{tslug}.out", ]: with open(filename, "a", encoding="utf-8") as f: try: print(f"{pid}:{tslug}: {msg}", file=f, flush=True) except UnicodeError: print(f"{pid}:{tslug}: {ascii(msg)}", file=f, flush=True) def arg_repr(arg: Any) -> str: """Make a customized repr for logged values.""" if isinstance(arg, CodeType): return ( f"" ) return repr(arg) def panopticon(*names: str | None) -> AnyCallable: """Decorate a function to log its calls.""" def _decorator(method: AnyCallable) -> AnyCallable: @functools.wraps(method) def _wrapped(self: Any, *args: Any) -> Any: try: # log(f"{method.__name__}() stack:\n{short_stack()}") args_reprs = [] for name, arg in zip(names, args): if name is None: continue args_reprs.append(f"{name}={arg_repr(arg)}") log(f"{id(self):#x}:{method.__name__}({', '.join(args_reprs)})") ret = method(self, *args) # log(f" end {id(self):#x}:{method.__name__}({', '.join(args_reprs)})") return ret except Exception as exc: log(f"!!{exc.__class__.__name__}: {exc}") if 1: log("".join(traceback.format_exception(exc))) try: assert sys_monitoring is not None sys_monitoring.set_events(sys.monitoring.COVERAGE_ID, 0) except ValueError: # We might have already shut off monitoring. log("oops, shutting off events with disabled tool id") raise return _wrapped return _decorator else: def log(msg: str) -> None: """Write a message to our detailed debugging log(s), but not really.""" def panopticon(*names: str | None) -> AnyCallable: """Decorate a function to log its calls, but not really.""" def _decorator(meth: AnyCallable) -> AnyCallable: return meth return _decorator @dataclass class CodeInfo: """The information we want about each code object.""" tracing: bool file_data: TTraceFileData | None byte_to_line: dict[TOffset, TLineNo] | None # Keys are start instruction offsets for branches. # Values are lists: # [ # ([offset, offset, ...], (from_line, to_line)), # ([offset, offset, ...], (from_line, to_line)), # ] # Two possible trails from the branch point, left and right. branch_trails: TBranchTrails def bytes_to_lines(code: CodeType) -> dict[TOffset, TLineNo]: """Make a dict mapping byte code offsets to line numbers.""" b2l = {} for bstart, bend, lineno in code.co_lines(): if lineno is not None: for boffset in range(bstart, bend, 2): b2l[boffset] = lineno return b2l class SysMonitor(Tracer): """Python implementation of the raw data tracer for PEP669 implementations.""" # One of these will be used across threads. Be careful. def __init__(self, tool_id: int) -> None: # Attributes set from the collector: self.data: TTraceData self.trace_arcs = False self.should_trace: TShouldTraceFn self.should_trace_cache: dict[str, TFileDisposition | None] # TODO: should_start_context and switch_context are unused! # Change tests/testenv.py:DYN_CONTEXTS when this is updated. self.should_start_context: TShouldStartContextFn | None = None self.switch_context: Callable[[str | None], None] | None = None self.lock_data: Callable[[], None] self.unlock_data: Callable[[], None] # TODO: warn is unused. self.warn: TWarnFn self.myid = tool_id # Map id(code_object) -> CodeInfo self.code_infos: dict[int, CodeInfo] = {} # A list of code_objects, just to keep them alive so that id's are # useful as identity. self.code_objects: list[CodeType] = [] self.sysmon_on = False self.lock = threading.Lock() self.stats: dict[str, int] | None = None if COLLECT_STATS: self.stats = dict.fromkeys( "starts start_tracing returns line_lines line_arcs branches branch_trails".split(), 0, ) self._activity = False def __repr__(self) -> str: points = sum(len(v) for v in self.data.values()) files = len(self.data) return f"" @panopticon() def start(self) -> None: """Start this Tracer.""" with self.lock: assert sys_monitoring is not None sys_monitoring.use_tool_id(self.myid, "coverage.py") register = functools.partial(sys_monitoring.register_callback, self.myid) events = sys.monitoring.events sys_monitoring.set_events(self.myid, events.PY_START) register(events.PY_START, self.sysmon_py_start) if self.trace_arcs: register(events.PY_RETURN, self.sysmon_py_return) register(events.LINE, self.sysmon_line_arcs) if env.PYBEHAVIOR.branch_right_left: register( events.BRANCH_RIGHT, # type:ignore[attr-defined] self.sysmon_branch_either, ) register( events.BRANCH_LEFT, # type:ignore[attr-defined] self.sysmon_branch_either, ) else: register(events.LINE, self.sysmon_line_lines) sys_monitoring.restart_events() self.sysmon_on = True @panopticon() def stop(self) -> None: """Stop this Tracer.""" with self.lock: if not self.sysmon_on: # In forking situations, we might try to stop when we are not # started. Do nothing in that case. return assert sys_monitoring is not None sys_monitoring.set_events(self.myid, 0) self.sysmon_on = False sys_monitoring.free_tool_id(self.myid) @panopticon() def post_fork(self) -> None: """The process has forked, clean up as needed.""" self.stop() def activity(self) -> bool: """Has there been any activity?""" return self._activity def reset_activity(self) -> None: """Reset the activity() flag.""" self._activity = False def get_stats(self) -> dict[str, int] | None: """Return a dictionary of statistics, or None.""" return self.stats @panopticon("code", "@") def sysmon_py_start( self, code: CodeType, instruction_offset: TOffset ) -> MonitorReturn: """Handle sys.monitoring.events.PY_START events.""" # Entering a new frame. Decide if we should trace in this file. self._activity = True if self.stats is not None: self.stats["starts"] += 1 code_info = self.code_infos.get(id(code)) tracing_code: bool | None = None file_data: TTraceFileData | None = None if code_info is not None: tracing_code = code_info.tracing file_data = code_info.file_data if tracing_code is None: filename = code.co_filename disp = self.should_trace_cache.get(filename) if disp is None: frame = inspect.currentframe() if frame is not None: frame = inspect.currentframe().f_back # type: ignore[union-attr] if LOG: # @panopticon adds a frame. frame = frame.f_back # type: ignore[union-attr] disp = self.should_trace(filename, frame) # type: ignore[arg-type] self.should_trace_cache[filename] = disp tracing_code = disp.trace if tracing_code: tracename = disp.source_filename assert tracename is not None self.lock_data() try: if tracename not in self.data: self.data[tracename] = set() finally: self.unlock_data() file_data = self.data[tracename] b2l = bytes_to_lines(code) else: file_data = None b2l = None code_info = CodeInfo( tracing=tracing_code, file_data=file_data, byte_to_line=b2l, branch_trails={}, ) self.code_infos[id(code)] = code_info self.code_objects.append(code) if tracing_code: if self.stats is not None: self.stats["start_tracing"] += 1 events = sys.monitoring.events with self.lock: if self.sysmon_on: assert sys_monitoring is not None local_events = events.PY_RETURN | events.PY_RESUME | events.LINE if self.trace_arcs: assert env.PYBEHAVIOR.branch_right_left local_events |= ( events.BRANCH_RIGHT # type:ignore[attr-defined] | events.BRANCH_LEFT # type:ignore[attr-defined] ) sys_monitoring.set_local_events(self.myid, code, local_events) return DISABLE @panopticon("code", "@", None) def sysmon_py_return( self, code: CodeType, instruction_offset: TOffset, retval: object, ) -> MonitorReturn: """Handle sys.monitoring.events.PY_RETURN events for branch coverage.""" if self.stats is not None: self.stats["returns"] += 1 code_info = self.code_infos.get(id(code)) # code_info is not None and code_info.file_data is not None, since we # wouldn't have enabled this event if they were. last_line = code_info.byte_to_line[instruction_offset] # type: ignore if last_line is not None: arc = (last_line, -code.co_firstlineno) code_info.file_data.add(arc) # type: ignore # log(f"adding {arc=}") return DISABLE @panopticon("code", "line") def sysmon_line_lines(self, code: CodeType, line_number: TLineNo) -> MonitorReturn: """Handle sys.monitoring.events.LINE events for line coverage.""" if self.stats is not None: self.stats["line_lines"] += 1 code_info = self.code_infos.get(id(code)) # It should be true that code_info is not None and code_info.file_data # is not None, since we wouldn't have enabled this event if they were. # But somehow code_info can be None here, so we have to check. if code_info is not None and code_info.file_data is not None: code_info.file_data.add(line_number) # type: ignore # log(f"adding {line_number=}") return DISABLE @panopticon("code", "line") def sysmon_line_arcs(self, code: CodeType, line_number: TLineNo) -> MonitorReturn: """Handle sys.monitoring.events.LINE events for branch coverage.""" if self.stats is not None: self.stats["line_arcs"] += 1 code_info = self.code_infos[id(code)] # code_info is not None and code_info.file_data is not None, since we # wouldn't have enabled this event if they were. arc = (line_number, line_number) code_info.file_data.add(arc) # type: ignore # log(f"adding {arc=}") return DISABLE @panopticon("code", "@", "@") def sysmon_branch_either( self, code: CodeType, instruction_offset: TOffset, destination_offset: TOffset ) -> MonitorReturn: """Handle BRANCH_RIGHT and BRANCH_LEFT events.""" if self.stats is not None: self.stats["branches"] += 1 code_info = self.code_infos[id(code)] # code_info is not None and code_info.file_data is not None, since we # wouldn't have enabled this event if they were. if not code_info.branch_trails: if self.stats is not None: self.stats["branch_trails"] += 1 code_info.branch_trails = branch_trails(code) # log(f"branch_trails for {code}:\n {code_info.branch_trails}") added_arc = False dest_info = code_info.branch_trails.get(instruction_offset) # log(f"{dest_info = }") if dest_info is not None: for offsets, arc in dest_info: if arc is None: continue if destination_offset in offsets: code_info.file_data.add(arc) # type: ignore # log(f"adding {arc=}") added_arc = True break if not added_arc: # This could be an exception jumping from line to line. assert code_info.byte_to_line is not None l1 = code_info.byte_to_line[instruction_offset] l2 = code_info.byte_to_line.get(destination_offset) if l2 is not None and l1 != l2: arc = (l1, l2) code_info.file_data.add(arc) # type: ignore # log(f"adding unforeseen {arc=}") return DISABLE