From f6da3b61561547588420608bbcf008c227ea7139 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Anders=20Hovm=C3=B6ller?= Date: Mon, 1 Oct 2018 13:06:12 +0200 Subject: [PATCH] Default cProfile command line to cumulative time Display one folder level for pstats.stripdirs() when filename starts with __ (__init__.py, __main__.py primarily) --- Lib/cProfile.py | 2 +- Lib/pstats.py | 65 ++++++++++++++++++- Lib/test/test_pstats.py | 63 ++++++++++++++++++ .../2020-10-18-13-20-00.bpo-34861.8JanqQ.rst | 2 + 4 files changed, 128 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2020-10-18-13-20-00.bpo-34861.8JanqQ.rst diff --git a/Lib/cProfile.py b/Lib/cProfile.py index e7c868b8d55543..a2b708a9d7a9db 100644 --- a/Lib/cProfile.py +++ b/Lib/cProfile.py @@ -41,7 +41,7 @@ def print_stats(self, sort=-1): import pstats if not isinstance(sort, tuple): sort = (sort,) - pstats.Stats(self).strip_dirs().sort_stats(*sort).print_stats() + pstats.Stats(self).strip_non_unique_dirs().sort_stats(*sort).print_stats() def dump_stats(self, file): import marshal diff --git a/Lib/pstats.py b/Lib/pstats.py index becaf35580eaee..7390da3beaa2ef 100644 --- a/Lib/pstats.py +++ b/Lib/pstats.py @@ -26,9 +26,12 @@ import marshal import re +from collections import Counter from enum import StrEnum, _simple_enum from functools import cmp_to_key from dataclasses import dataclass +from os.path import join +from pathlib import Path __all__ = ["Stats", "SortKey", "FunctionProfile", "StatsProfile"] @@ -276,16 +279,19 @@ def reverse_order(self): return self def strip_dirs(self): + return self._strip_directory_data(lambda fullpath_linenum_funcname: func_strip_path(fullpath_linenum_funcname)) + + def _strip_directory_data(self, strip_function): oldstats = self.stats self.stats = newstats = {} max_name_len = 0 for func, (cc, nc, tt, ct, callers) in oldstats.items(): - newfunc = func_strip_path(func) + newfunc = strip_function(func) if len(func_std_string(newfunc)) > max_name_len: max_name_len = len(func_std_string(newfunc)) newcallers = {} for func2, caller in callers.items(): - newcallers[func_strip_path(func2)] = caller + newcallers[strip_function(func2)] = caller if newfunc in newstats: newstats[newfunc] = add_func_stats( @@ -296,7 +302,7 @@ def strip_dirs(self): old_top = self.top_level self.top_level = new_top = set() for func in old_top: - new_top.add(func_strip_path(func)) + new_top.add(strip_function(func)) self.max_name_len = max_name_len @@ -304,6 +310,22 @@ def strip_dirs(self): self.all_callees = None return self + def strip_non_unique_dirs(self): + full_paths = set() + + for (full_path, _, _), (_, _, _, _, callers) in self.stats.items(): + full_paths.add(full_path) + for (full_path_caller, _, _), _ in callers.items(): + full_paths.add(full_path_caller) + + minimal_path_by_full_path = _build_minimal_path_by_full_path(full_paths) + + def strip_function(fullpath_linenum_funcname): + fullpath, linenum, funcname = fullpath_linenum_funcname + return minimal_path_by_full_path[fullpath], linenum, funcname + + return self._strip_directory_data(strip_function) + def calc_callees(self): if self.all_callees: return @@ -774,4 +796,41 @@ def postcmd(self, stop, line): except KeyboardInterrupt: pass + +def _build_minimal_path_by_full_path(paths): + if not paths: + return paths + + completed = { + full_path: None + for full_path in paths + } + split_path_by_full = {full_path: Path(full_path).parts for full_path in paths} + max_step = max(len(x) for x in split_path_by_full.values()) + step = 1 + while step <= max_step: + short_path_by_full = { + full_path: split_path_by_full[full_path][-step:] + for full_path, value in completed.items() + if value is None + } + full_path_by_short = {v: k for k, v in short_path_by_full.items()} + + for short_path, count in Counter(short_path_by_full.values()).most_common(): + if count == 1: + joined_short_path = join(*short_path) + # __init__.py is handled specially because it's a very common + # file name which gives no clue what file is meant + if joined_short_path == '__init__.py': + continue + completed[full_path_by_short[short_path]] = joined_short_path + + step += 1 + + return { + full_path: short_path if short_path is not None else full_path + for full_path, short_path in completed.items() + } + + # That's all, folks. diff --git a/Lib/test/test_pstats.py b/Lib/test/test_pstats.py index d5a5a9738c2498..e5cc64834fdd21 100644 --- a/Lib/test/test_pstats.py +++ b/Lib/test/test_pstats.py @@ -1,8 +1,10 @@ import unittest +from os.path import join from test import support from io import StringIO from pstats import SortKey +from pstats import _build_minimal_path_by_full_path from enum import StrEnum, _test_simple_enum import os @@ -148,5 +150,66 @@ def test_SortKey_enum(self): self.assertEqual(SortKey.FILENAME, 'filename') self.assertNotEqual(SortKey.FILENAME, SortKey.CALLS) +class BuildMinimalPathTests(unittest.TestCase): + def test_non_unique(self): + self.assertEqual( + _build_minimal_path_by_full_path([ + 'foo/bar', + 'foo/bar', + ]), + { + 'foo/bar': 'bar', + }, + ) + + def test_needs_no_minimizing(self): + self.assertEqual( + _build_minimal_path_by_full_path([ + 'foo', + 'bar', + ]), + { + 'foo': 'foo', + 'bar': 'bar', + }, + ) + + def test_normal_case(self): + self.assertEqual( + _build_minimal_path_by_full_path([ + join('foo', 'bar'), + join('baz', 'bar'), + join('apple', 'orange'), + ]), + { + join('foo', 'bar'): join('foo', 'bar'), + join('baz', 'bar'): join('baz', 'bar'), + join('apple', 'orange'): 'orange', + } + ) + + def test_intermediate(self): + self.assertEqual( + _build_minimal_path_by_full_path([ + join('apple', 'mango', 'orange', 'grape', 'melon'), + join('apple', 'mango', 'lemon', 'grape', 'melon'), + ]), + { + join('apple', 'mango', 'orange', 'grape', 'melon'): join('orange', 'grape', 'melon'), + join('apple', 'mango', 'lemon', 'grape', 'melon'): join('lemon', 'grape', 'melon'), + } + ) + + def test_dunder_init_special_case(self): + self.assertEqual( + _build_minimal_path_by_full_path([ + join('apple', 'mango', 'orange', 'grape', '__init__.py'), + ]), + { + join('apple', 'mango', 'orange', 'grape', '__init__.py'): join('grape', '__init__.py'), + } + ) + + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2020-10-18-13-20-00.bpo-34861.8JanqQ.rst b/Misc/NEWS.d/next/Library/2020-10-18-13-20-00.bpo-34861.8JanqQ.rst new file mode 100644 index 00000000000000..3a97617ac3f02a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-10-18-13-20-00.bpo-34861.8JanqQ.rst @@ -0,0 +1,2 @@ +cProfile command line output now defaults to cumulative time +cProfile command line gives you unique filenames in the output (plus an extra path level of path if the filename is __init__.py)