# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 # For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt """Bytecode analysis for coverage.py""" from __future__ import annotations import dis from types import CodeType from typing import Iterable, Optional from collections.abc import Iterator from coverage.types import TArc, TOffset def code_objects(code: CodeType) -> Iterator[CodeType]: """Iterate over all the code objects in `code`.""" stack = [code] while stack: # We're going to return the code object on the stack, but first # push its children for later returning. code = stack.pop() for c in code.co_consts: if isinstance(c, CodeType): stack.append(c) yield code def op_set(*op_names: str) -> set[int]: """Make a set of opcodes from instruction names. The names might not exist in this version of Python, skip those if not. """ return {op for name in op_names if (op := dis.opmap.get(name))} # Opcodes that are unconditional jumps elsewhere. ALWAYS_JUMPS = op_set( "JUMP_BACKWARD", "JUMP_BACKWARD_NO_INTERRUPT", "JUMP_FORWARD", ) # Opcodes that exit from a function. RETURNS = op_set("RETURN_VALUE", "RETURN_GENERATOR") class InstructionWalker: """Utility to step through trails of instructions. We have two reasons to need sequences of instructions from a code object: First, in strict sequence to visit all the instructions in the object. This is `walk(follow_jumps=False)`. Second, we want to follow jumps to understand how execution will flow: `walk(follow_jumps=True)`. """ def __init__(self, code: CodeType) -> None: self.code = code self.insts: dict[TOffset, dis.Instruction] = {} inst = None for inst in dis.get_instructions(code): self.insts[inst.offset] = inst assert inst is not None self.max_offset = inst.offset def walk( self, *, start_at: TOffset = 0, follow_jumps: bool = True ) -> Iterable[dis.Instruction]: """ Yield instructions starting from `start_at`. Follow unconditional jumps if `follow_jumps` is true. """ seen = set() offset = start_at while offset < self.max_offset + 1: if offset in seen: break seen.add(offset) if inst := self.insts.get(offset): yield inst if follow_jumps and inst.opcode in ALWAYS_JUMPS: offset = inst.jump_target continue offset += 2 TBranchTrail = tuple[set[TOffset], Optional[TArc]] TBranchTrails = dict[TOffset, list[TBranchTrail]] def branch_trails(code: CodeType) -> TBranchTrails: """ Calculate branch trails for `code`. Instructions can have a jump_target, where they might jump to next. Some instructions with a jump_target are unconditional jumps (ALWAYS_JUMPS), so they aren't interesting to us, since they aren't the start of a branch possibility. Instructions that might or might not jump somewhere else are branch possibilities. For each of those, we track a trail of instructions. These are lists of instruction offsets, the next instructions that can execute. We follow the trail until we get to a new source line. That gives us the arc from the original instruction's line to the new source line. """ the_trails: TBranchTrails = {} iwalker = InstructionWalker(code) for inst in iwalker.walk(follow_jumps=False): if not inst.jump_target: # We only care about instructions with jump targets. continue if inst.opcode in ALWAYS_JUMPS: # We don't care about unconditional jumps. continue from_line = inst.line_number if from_line is None: continue def walk_one_branch(start_at: TOffset) -> TBranchTrail: # pylint: disable=cell-var-from-loop inst_offsets: set[TOffset] = set() to_line = None for inst2 in iwalker.walk(start_at=start_at): inst_offsets.add(inst2.offset) if inst2.line_number and inst2.line_number != from_line: to_line = inst2.line_number break elif inst2.jump_target and (inst2.opcode not in ALWAYS_JUMPS): break elif inst2.opcode in RETURNS: to_line = -code.co_firstlineno break if to_line is not None: return inst_offsets, (from_line, to_line) else: return set(), None # Calculate two trails: one from the next instruction, and one from the # jump_target instruction. trails = [ walk_one_branch(start_at=inst.offset + 2), walk_one_branch(start_at=inst.jump_target), ] the_trails[inst.offset] = trails # Sometimes we get BRANCH_RIGHT or BRANCH_LEFT events from instructions # other than the original jump possibility instruction. Register each # trail under all of their offsets so we can pick up in the middle of a # trail if need be. for trail in trails: for offset in trail[0]: if offset not in the_trails: the_trails[offset] = [] the_trails[offset].append(trail) return the_trails