Skip to content

Refactor codecs #5623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 7 additions & 4 deletions Lib/_pycodecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1086,11 +1086,13 @@ def charmapencode_output(c, mapping):
rep = mapping[c]
if isinstance(rep, int) or isinstance(rep, int):
if rep < 256:
return rep
return [rep]
else:
raise TypeError("character mapping must be in range(256)")
elif isinstance(rep, str):
return ord(rep)
return [ord(rep)]
elif isinstance(rep, bytes):
return rep
elif rep == None:
raise KeyError("character maps to <undefined>")
else:
Expand All @@ -1113,12 +1115,13 @@ def PyUnicode_EncodeCharmap(p, size, mapping='latin-1', errors='strict'):
#/* try to encode it */
try:
x = charmapencode_output(ord(p[inpos]), mapping)
res += [x]
res += x
except KeyError:
x = unicode_call_errorhandler(errors, "charmap",
"character maps to <undefined>", p, inpos, inpos+1, False)
try:
res += [charmapencode_output(ord(y), mapping) for y in x[0]]
for y in x[0]:
res += charmapencode_output(ord(y), mapping)
except KeyError:
raise UnicodeEncodeError("charmap", p, inpos, inpos+1,
"character maps to <undefined>")
Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_charmapcodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ def test_constructorx(self):
self.assertEqual(str(b'dxf', codecname), 'dabcf')
self.assertEqual(str(b'dxfx', codecname), 'dabcfabc')

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_encodex(self):
self.assertEqual('abc'.encode(codecname), b'abc')
self.assertEqual('xdef'.encode(codecname), b'abcdef')
Expand Down
14 changes: 0 additions & 14 deletions Lib/test/test_codeccallbacks.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,6 @@ def relaxedutf8(exc):
self.assertRaises(UnicodeDecodeError, sin.decode,
"utf-8", "test.relaxedutf8")

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_charmapencode(self):
# For charmap encodings the replacement string will be
# mapped through the encoding again. This means, that
Expand Down Expand Up @@ -329,8 +327,6 @@ def check_exceptionobjectargs(self, exctype, args, msg):
exc = exctype(*args)
self.assertEqual(str(exc), msg)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_unicodeencodeerror(self):
self.check_exceptionobjectargs(
UnicodeEncodeError,
Expand Down Expand Up @@ -363,8 +359,6 @@ def test_unicodeencodeerror(self):
"'ascii' codec can't encode character '\\U00010000' in position 0: ouch"
)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_unicodedecodeerror(self):
self.check_exceptionobjectargs(
UnicodeDecodeError,
Expand All @@ -377,8 +371,6 @@ def test_unicodedecodeerror(self):
"'ascii' codec can't decode bytes in position 1-2: ouch"
)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_unicodetranslateerror(self):
self.check_exceptionobjectargs(
UnicodeTranslateError,
Expand Down Expand Up @@ -467,8 +459,6 @@ def test_badandgoodignoreexceptions(self):
("", 2)
)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_badandgoodreplaceexceptions(self):
# "replace" complains about a non-exception passed in
self.assertRaises(
Expand Down Expand Up @@ -509,8 +499,6 @@ def test_badandgoodreplaceexceptions(self):
("\ufffd", 2)
)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_badandgoodxmlcharrefreplaceexceptions(self):
# "xmlcharrefreplace" complains about a non-exception passed in
self.assertRaises(
Expand Down Expand Up @@ -1017,8 +1005,6 @@ def __getitem__(self, key):
self.assertRaises(ValueError, codecs.charmap_decode, b"\xff", "strict", D())
self.assertRaises(TypeError, codecs.charmap_decode, b"\xff", "strict", {0xff: sys.maxunicode+1})

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_encodehelper(self):
# enhance coverage of:
# Objects/unicodeobject.c::unicode_encode_call_errorhandler()
Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -5525,8 +5525,6 @@ def test_encoding_errors_default(self):
self.assertEqual(data, r'\U0001f602: \u2603\ufe0f: The \xd8resund '
r'Bridge joins Copenhagen to Malm\xf6')

# TODO: RustPython
@unittest.expectedFailure
def test_encoding_errors_none(self):
# Specifying None should behave as 'strict'
try:
Expand Down
1 change: 1 addition & 0 deletions common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ num-traits = { workspace = true }
once_cell = { workspace = true }
parking_lot = { workspace = true, optional = true }
rand = { workspace = true }
unicode_names2 = { workspace = true }

lock_api = "0.4"
radium = "0.7"
Expand Down
Loading
Loading