Skip to content

bpo-34861 Make cProfile default output more useful #9655

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Lib/cProfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def print_stats(self, sort=-1):
import pstats
if not isinstance(sort, tuple):
sort = (sort,)
pstats.Stats(self).strip_dirs().sort_stats(*sort).print_stats()
pstats.Stats(self).strip_non_unique_dirs().sort_stats(*sort).print_stats()

def dump_stats(self, file):
import marshal
Expand Down
65 changes: 62 additions & 3 deletions Lib/pstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,12 @@
import marshal
import re

from collections import Counter
from enum import StrEnum, _simple_enum
from functools import cmp_to_key
from dataclasses import dataclass
from os.path import join
from pathlib import Path

__all__ = ["Stats", "SortKey", "FunctionProfile", "StatsProfile"]

Expand Down Expand Up @@ -276,16 +279,19 @@ def reverse_order(self):
return self

def strip_dirs(self):
return self._strip_directory_data(lambda fullpath_linenum_funcname: func_strip_path(fullpath_linenum_funcname))

def _strip_directory_data(self, strip_function):
oldstats = self.stats
self.stats = newstats = {}
max_name_len = 0
for func, (cc, nc, tt, ct, callers) in oldstats.items():
newfunc = func_strip_path(func)
newfunc = strip_function(func)
if len(func_std_string(newfunc)) > max_name_len:
max_name_len = len(func_std_string(newfunc))
newcallers = {}
for func2, caller in callers.items():
newcallers[func_strip_path(func2)] = caller
newcallers[strip_function(func2)] = caller

if newfunc in newstats:
newstats[newfunc] = add_func_stats(
Expand All @@ -296,14 +302,30 @@ def strip_dirs(self):
old_top = self.top_level
self.top_level = new_top = set()
for func in old_top:
new_top.add(func_strip_path(func))
new_top.add(strip_function(func))

self.max_name_len = max_name_len

self.fcn_list = None
self.all_callees = None
return self

def strip_non_unique_dirs(self):
full_paths = set()

for (full_path, _, _), (_, _, _, _, callers) in self.stats.items():
full_paths.add(full_path)
for (full_path_caller, _, _), _ in callers.items():
full_paths.add(full_path_caller)

minimal_path_by_full_path = _build_minimal_path_by_full_path(full_paths)

def strip_function(fullpath_linenum_funcname):
fullpath, linenum, funcname = fullpath_linenum_funcname
return minimal_path_by_full_path[fullpath], linenum, funcname

return self._strip_directory_data(strip_function)

def calc_callees(self):
if self.all_callees:
return
Expand Down Expand Up @@ -774,4 +796,41 @@ def postcmd(self, stop, line):
except KeyboardInterrupt:
pass


def _build_minimal_path_by_full_path(paths):
if not paths:
return paths

completed = {
full_path: None
for full_path in paths
}
split_path_by_full = {full_path: Path(full_path).parts for full_path in paths}
max_step = max(len(x) for x in split_path_by_full.values())
step = 1
while step <= max_step:
short_path_by_full = {
full_path: split_path_by_full[full_path][-step:]
for full_path, value in completed.items()
if value is None
}
full_path_by_short = {v: k for k, v in short_path_by_full.items()}

for short_path, count in Counter(short_path_by_full.values()).most_common():
if count == 1:
joined_short_path = join(*short_path)
# __init__.py is handled specially because it's a very common
# file name which gives no clue what file is meant
if joined_short_path == '__init__.py':
continue
completed[full_path_by_short[short_path]] = joined_short_path

step += 1

return {
full_path: short_path if short_path is not None else full_path
for full_path, short_path in completed.items()
}


# That's all, folks.
63 changes: 63 additions & 0 deletions Lib/test/test_pstats.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import unittest
from os.path import join

from test import support
from io import StringIO
from pstats import SortKey
from pstats import _build_minimal_path_by_full_path
from enum import StrEnum, _test_simple_enum

import os
Expand Down Expand Up @@ -148,5 +150,66 @@ def test_SortKey_enum(self):
self.assertEqual(SortKey.FILENAME, 'filename')
self.assertNotEqual(SortKey.FILENAME, SortKey.CALLS)

class BuildMinimalPathTests(unittest.TestCase):
def test_non_unique(self):
self.assertEqual(
_build_minimal_path_by_full_path([
'foo/bar',
'foo/bar',
]),
{
'foo/bar': 'bar',
},
)

def test_needs_no_minimizing(self):
self.assertEqual(
_build_minimal_path_by_full_path([
'foo',
'bar',
]),
{
'foo': 'foo',
'bar': 'bar',
},
)

def test_normal_case(self):
self.assertEqual(
_build_minimal_path_by_full_path([
join('foo', 'bar'),
join('baz', 'bar'),
join('apple', 'orange'),
]),
{
join('foo', 'bar'): join('foo', 'bar'),
join('baz', 'bar'): join('baz', 'bar'),
join('apple', 'orange'): 'orange',
}
)

def test_intermediate(self):
self.assertEqual(
_build_minimal_path_by_full_path([
join('apple', 'mango', 'orange', 'grape', 'melon'),
join('apple', 'mango', 'lemon', 'grape', 'melon'),
]),
{
join('apple', 'mango', 'orange', 'grape', 'melon'): join('orange', 'grape', 'melon'),
join('apple', 'mango', 'lemon', 'grape', 'melon'): join('lemon', 'grape', 'melon'),
}
)

def test_dunder_init_special_case(self):
self.assertEqual(
_build_minimal_path_by_full_path([
join('apple', 'mango', 'orange', 'grape', '__init__.py'),
]),
{
join('apple', 'mango', 'orange', 'grape', '__init__.py'): join('grape', '__init__.py'),
}
)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
cProfile command line output now defaults to cumulative time
cProfile command line gives you unique filenames in the output (plus an extra path level of path if the filename is __init__.py)
Loading