Skip to content

update test_difflib from CPython3.11.2 #5063

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 10 additions & 43 deletions Lib/difflib.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class SequenceMatcher:
notion, pairing up elements that appear uniquely in each sequence.
That, and the method here, appear to yield more intuitive difference
reports than does diff. This method appears to be the least vulnerable
to synching up on blocks of "junk lines", though (like blank lines in
to syncing up on blocks of "junk lines", though (like blank lines in
ordinary text files, or maybe "<P>" lines in HTML files). That may be
because this is the only method of the 3 that has a *concept* of
"junk" <wink>.
Expand Down Expand Up @@ -115,38 +115,6 @@ class SequenceMatcher:
case. SequenceMatcher is quadratic time for the worst case and has
expected-case behavior dependent in a complicated way on how many
elements the sequences have in common; best case time is linear.

Methods:

__init__(isjunk=None, a='', b='')
Construct a SequenceMatcher.

set_seqs(a, b)
Set the two sequences to be compared.

set_seq1(a)
Set the first sequence to be compared.

set_seq2(b)
Set the second sequence to be compared.

find_longest_match(alo, ahi, blo, bhi)
Find longest matching block in a[alo:ahi] and b[blo:bhi].

get_matching_blocks()
Return list of triples describing matching subsequences.

get_opcodes()
Return list of 5-tuples describing how to turn a into b.

ratio()
Return a measure of the sequences' similarity (float in [0,1]).

quick_ratio()
Return an upper bound on .ratio() relatively quickly.

real_quick_ratio()
Return an upper bound on ratio() very quickly.
"""

def __init__(self, isjunk=None, a='', b='', autojunk=True):
Expand Down Expand Up @@ -334,9 +302,11 @@ def __chain_b(self):
for elt in popular: # ditto; as fast for 1% deletion
del b2j[elt]

def find_longest_match(self, alo, ahi, blo, bhi):
def find_longest_match(self, alo=0, ahi=None, blo=0, bhi=None):
"""Find longest matching block in a[alo:ahi] and b[blo:bhi].

By default it will find the longest match in the entirety of a and b.

If isjunk is not defined:

Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
Expand Down Expand Up @@ -391,6 +361,10 @@ def find_longest_match(self, alo, ahi, blo, bhi):
# the unique 'b's and then matching the first two 'a's.

a, b, b2j, isbjunk = self.a, self.b, self.b2j, self.bjunk.__contains__
if ahi is None:
ahi = len(a)
if bhi is None:
bhi = len(b)
besti, bestj, bestsize = alo, blo, 0
# find longest junk-free match
# during an iteration of the loop, j2len[j] = length of longest
Expand Down Expand Up @@ -688,6 +662,7 @@ def real_quick_ratio(self):

__class_getitem__ = classmethod(GenericAlias)


def get_close_matches(word, possibilities, n=3, cutoff=0.6):
"""Use SequenceMatcher to return list of the best "good enough" matches.

Expand Down Expand Up @@ -830,14 +805,6 @@ class Differ:
+ 4. Complicated is better than complex.
? ++++ ^ ^
+ 5. Flat is better than nested.

Methods:

__init__(linejunk=None, charjunk=None)
Construct a text differencer, with optional filters.

compare(a, b)
Compare two sequences of lines; generate the resulting delta.
"""

def __init__(self, linejunk=None, charjunk=None):
Expand Down Expand Up @@ -870,7 +837,7 @@ def compare(self, a, b):
Each sequence must contain individual single-line strings ending with
newlines. Such sequences can be obtained from the `readlines()` method
of file-like objects. The delta generated also consists of newline-
terminated strings, ready to be printed as-is via the writeline()
terminated strings, ready to be printed as-is via the writelines()
method of a file-like object.

Example:
Expand Down
64 changes: 56 additions & 8 deletions Lib/test/test_difflib.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import difflib
from test.support import run_unittest, findfile
from test.support import findfile
import unittest
import doctest
import sys
Expand Down Expand Up @@ -241,7 +241,7 @@ def test_html_diff(self):
#with open('test_difflib_expect.html','w') as fp:
# fp.write(actual)

with open(findfile('test_difflib_expect.html')) as fp:
with open(findfile('test_difflib_expect.html'), encoding="utf-8") as fp:
self.assertEqual(actual, fp.read())

def test_recursion_limit(self):
Expand Down Expand Up @@ -503,12 +503,60 @@ def test_is_character_junk_false(self):
for char in ['a', '#', '\n', '\f', '\r', '\v']:
self.assertFalse(difflib.IS_CHARACTER_JUNK(char), repr(char))

def test_main():
class TestFindLongest(unittest.TestCase):
def longer_match_exists(self, a, b, n):
return any(b_part in a for b_part in
[b[i:i + n + 1] for i in range(0, len(b) - n - 1)])

def test_default_args(self):
a = 'foo bar'
b = 'foo baz bar'
sm = difflib.SequenceMatcher(a=a, b=b)
match = sm.find_longest_match()
self.assertEqual(match.a, 0)
self.assertEqual(match.b, 0)
self.assertEqual(match.size, 6)
self.assertEqual(a[match.a: match.a + match.size],
b[match.b: match.b + match.size])
self.assertFalse(self.longer_match_exists(a, b, match.size))

match = sm.find_longest_match(alo=2, blo=4)
self.assertEqual(match.a, 3)
self.assertEqual(match.b, 7)
self.assertEqual(match.size, 4)
self.assertEqual(a[match.a: match.a + match.size],
b[match.b: match.b + match.size])
self.assertFalse(self.longer_match_exists(a[2:], b[4:], match.size))

match = sm.find_longest_match(bhi=5, blo=1)
self.assertEqual(match.a, 1)
self.assertEqual(match.b, 1)
self.assertEqual(match.size, 4)
self.assertEqual(a[match.a: match.a + match.size],
b[match.b: match.b + match.size])
self.assertFalse(self.longer_match_exists(a, b[1:5], match.size))

def test_longest_match_with_popular_chars(self):
a = 'dabcd'
b = 'd'*100 + 'abc' + 'd'*100 # length over 200 so popular used
sm = difflib.SequenceMatcher(a=a, b=b)
match = sm.find_longest_match(0, len(a), 0, len(b))
self.assertEqual(match.a, 0)
self.assertEqual(match.b, 99)
self.assertEqual(match.size, 5)
self.assertEqual(a[match.a: match.a + match.size],
b[match.b: match.b + match.size])
self.assertFalse(self.longer_match_exists(a, b, match.size))


def setUpModule():
difflib.HtmlDiff._default_prefix = 0
Doctests = doctest.DocTestSuite(difflib)
run_unittest(
TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
TestOutputFormat, TestBytes, TestJunkAPIs, Doctests)


def load_tests(loader, tests, pattern):
tests.addTest(doctest.DocTestSuite(difflib))
return tests


if __name__ == '__main__':
test_main()
unittest.main()