Skip to content

gh-130197: Improve test coverage of msgfmt.py #133048

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 1, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 114 additions & 13 deletions Lib/test/test_tools/test_msgfmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,21 @@

from test.support.os_helper import temp_cwd
from test.support.script_helper import assert_python_failure, assert_python_ok
from test.test_tools import skip_if_missing, toolsdir
from test.test_tools import imports_under_tool, skip_if_missing, toolsdir


skip_if_missing('i18n')

data_dir = (Path(__file__).parent / 'msgfmt_data').resolve()
script_dir = Path(toolsdir) / 'i18n'
msgfmt = script_dir / 'msgfmt.py'
msgfmt_py = script_dir / 'msgfmt.py'

with imports_under_tool("i18n"):
import msgfmt


def compile_messages(po_file, mo_file):
assert_python_ok(msgfmt, '-o', mo_file, po_file)
assert_python_ok(msgfmt_py, '-o', mo_file, po_file)


class CompilationTest(unittest.TestCase):
Expand Down Expand Up @@ -92,7 +95,7 @@ def test_po_with_bom(self):
with temp_cwd():
Path('bom.po').write_bytes(b'\xef\xbb\xbfmsgid "Python"\nmsgstr "Pioton"\n')

res = assert_python_failure(msgfmt, 'bom.po')
res = assert_python_failure(msgfmt_py, 'bom.po')
err = res.err.decode('utf-8')
self.assertIn('The file bom.po starts with a UTF-8 BOM', err)

Expand All @@ -103,7 +106,7 @@ def test_invalid_msgid_plural(self):
msgstr[0] "singular"
''')

res = assert_python_failure(msgfmt, 'invalid.po')
res = assert_python_failure(msgfmt_py, 'invalid.po')
err = res.err.decode('utf-8')
self.assertIn('msgid_plural not preceded by msgid', err)

Expand All @@ -114,7 +117,7 @@ def test_plural_without_msgid_plural(self):
msgstr[0] "bar"
''')

res = assert_python_failure(msgfmt, 'invalid.po')
res = assert_python_failure(msgfmt_py, 'invalid.po')
err = res.err.decode('utf-8')
self.assertIn('plural without msgid_plural', err)

Expand All @@ -126,7 +129,7 @@ def test_indexed_msgstr_without_msgid_plural(self):
msgstr "bar"
''')

res = assert_python_failure(msgfmt, 'invalid.po')
res = assert_python_failure(msgfmt_py, 'invalid.po')
err = res.err.decode('utf-8')
self.assertIn('indexed msgstr required for plural', err)

Expand All @@ -136,38 +139,136 @@ def test_generic_syntax_error(self):
"foo"
''')

res = assert_python_failure(msgfmt, 'invalid.po')
res = assert_python_failure(msgfmt_py, 'invalid.po')
err = res.err.decode('utf-8')
self.assertIn('Syntax error', err)


class POParserTest(unittest.TestCase):
@classmethod
def tearDownClass(cls):
# msgfmt uses a global variable to store messages,
# clear it after the tests.
msgfmt.MESSAGES.clear()

def test_strings(self):
# Test that the PO parser correctly handles and unescape
# strings in the PO file.
# The PO file format allows for a variety of escape sequences,
# octal and hex escapes.
valid_strings = (
# empty strings
('""', ''),
('"" "" ""', ''),
# allowed escape sequences
(r'"\\"', '\\'),
(r'"\""', '"'),
(r'"\t"', '\t'),
(r'"\n"', '\n'),
(r'"\r"', '\r'),
(r'"\f"', '\f'),
(r'"\a"', '\a'),
(r'"\b"', '\b'),
(r'"\v"', '\v'),
# non-empty strings
('"foo"', 'foo'),
('"foo" "bar"', 'foobar'),
('"foo""bar"', 'foobar'),
('"" "foo" ""', 'foo'),
# newlines and tabs
(r'"foo\nbar"', 'foo\nbar'),
(r'"foo\n" "bar"', 'foo\nbar'),
(r'"foo\tbar"', 'foo\tbar'),
(r'"foo\t" "bar"', 'foo\tbar'),
# escaped quotes
(r'"foo\"bar"', 'foo"bar'),
(r'"foo\"" "bar"', 'foo"bar'),
(r'"foo\\" "bar"', 'foo\\bar'),
# octal escapes
(r'"\120\171\164\150\157\156"', 'Python'),
(r'"\120\171\164" "\150\157\156"', 'Python'),
(r'"\"\120\171\164" "\150\157\156\""', '"Python"'),
# hex escapes
(r'"\x50\x79\x74\x68\x6f\x6e"', 'Python'),
(r'"\x50\x79\x74" "\x68\x6f\x6e"', 'Python'),
(r'"\"\x50\x79\x74" "\x68\x6f\x6e\""', '"Python"'),
)

with temp_cwd():
for po_string, expected in valid_strings:
with self.subTest(po_string=po_string):
# Construct a PO file with a single entry,
# compile it, read it into a catalog and
# check the result.
po = f'msgid {po_string}\nmsgstr "translation"'
Path('messages.po').write_text(po)
# Reset the global MESSAGES dictionary
msgfmt.MESSAGES.clear()
msgfmt.make('messages.po', 'messages.mo')

with open('messages.mo', 'rb') as f:
actual = GNUTranslations(f)

self.assertDictEqual(actual._catalog, {expected: 'translation'})

invalid_strings = (
# "''", # invalid but currently accepted
'"',
'"""',
'"" "',
'foo',
'"" "foo',
'"foo" foo',
'42',
'"" 42 ""',
# disallowed escape sequences
# r'"\'"', # invalid but currently accepted
# r'"\e"', # invalid but currently accepted
# r'"\8"', # invalid but currently accepted
# r'"\9"', # invalid but currently accepted
r'"\x"',
r'"\u1234"',
r'"\N{ROMAN NUMERAL NINE}"'
)
with temp_cwd():
for invalid_string in invalid_strings:
with self.subTest(string=invalid_string):
po = f'msgid {invalid_string}\nmsgstr "translation"'
Path('messages.po').write_text(po)
# Reset the global MESSAGES dictionary
msgfmt.MESSAGES.clear()
with self.assertRaises(Exception):
msgfmt.make('messages.po', 'messages.mo')


class CLITest(unittest.TestCase):

def test_help(self):
for option in ('--help', '-h'):
res = assert_python_ok(msgfmt, option)
res = assert_python_ok(msgfmt_py, option)
err = res.err.decode('utf-8')
self.assertIn('Generate binary message catalog from textual translation description.', err)

def test_version(self):
for option in ('--version', '-V'):
res = assert_python_ok(msgfmt, option)
res = assert_python_ok(msgfmt_py, option)
out = res.out.decode('utf-8').strip()
self.assertEqual('msgfmt.py 1.2', out)

def test_invalid_option(self):
res = assert_python_failure(msgfmt, '--invalid-option')
res = assert_python_failure(msgfmt_py, '--invalid-option')
err = res.err.decode('utf-8')
self.assertIn('Generate binary message catalog from textual translation description.', err)
self.assertIn('option --invalid-option not recognized', err)

def test_no_input_file(self):
res = assert_python_ok(msgfmt)
res = assert_python_ok(msgfmt_py)
err = res.err.decode('utf-8').replace('\r\n', '\n')
self.assertIn('No input file given\n'
"Try `msgfmt --help' for more information.", err)

def test_nonexistent_file(self):
assert_python_failure(msgfmt, 'nonexistent.po')
assert_python_failure(msgfmt_py, 'nonexistent.po')


def update_catalog_snapshots():
Expand Down
Loading