Skip to content

gh-133036: Deprecate codecs.open #133038

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Apr 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Doc/deprecations/pending-removal-in-future.rst
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ although there is currently no date scheduled for their removal.
:data:`calendar.FEBRUARY`.
(Contributed by Prince Roshan in :gh:`103636`.)

* :mod:`codecs`: use :func:`open` instead of :func:`codecs.open`. (:gh:`133038`)

* :attr:`codeobject.co_lnotab`: use the :meth:`codeobject.co_lines` method
instead.

Expand Down
4 changes: 4 additions & 0 deletions Doc/library/codecs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ wider range of codecs when working with binary files:
.. versionchanged:: 3.11
The ``'U'`` mode has been removed.

.. deprecated:: next

:func:`codecs.open` has been superseded by :func:`open`.


.. function:: EncodedFile(file, data_encoding, file_encoding=None, errors='strict')

Expand Down
4 changes: 4 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1575,6 +1575,10 @@ Deprecated
as a single positional argument.
(Contributed by Serhiy Storchaka in :gh:`109218`.)

* :mod:`codecs`:
:func:`codecs.open` is now deprecated. Use :func:`open` instead.
(Contributed by Inada Naoki in :gh:`133036`.)

* :mod:`functools`:
Calling the Python implementation of :func:`functools.reduce` with *function*
or *sequence* as keyword arguments is now deprecated.
Expand Down
3 changes: 1 addition & 2 deletions Lib/_pyio.py
Original file line number Diff line number Diff line change
Expand Up @@ -2056,8 +2056,7 @@ def __init__(self, buffer, encoding=None, errors=None, newline=None,
raise ValueError("invalid encoding: %r" % encoding)

if not codecs.lookup(encoding)._is_text_encoding:
msg = ("%r is not a text encoding; "
"use codecs.open() to handle arbitrary codecs")
msg = "%r is not a text encoding"
raise LookupError(msg % encoding)

if errors is None:
Expand Down
6 changes: 4 additions & 2 deletions Lib/codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,6 @@ def __reduce_ex__(self, proto):
### Shortcuts

def open(filename, mode='r', encoding=None, errors='strict', buffering=-1):

""" Open an encoded file using the given mode and return
a wrapped version providing transparent encoding/decoding.

Expand Down Expand Up @@ -912,8 +911,11 @@ def open(filename, mode='r', encoding=None, errors='strict', buffering=-1):
.encoding which allows querying the used encoding. This
attribute is only available if an encoding was specified as
parameter.

"""
import warnings
warnings.warn("codecs.open() is deprecated. Use open() instead.",
DeprecationWarning, stacklevel=2)

if encoding is not None and \
'b' not in mode:
# Force opening of the file in binary mode
Expand Down
42 changes: 21 additions & 21 deletions Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import unittest
import encodings
from unittest import mock
import warnings

from test import support
from test.support import os_helper
Expand All @@ -20,13 +21,12 @@
except ImportError:
_testinternalcapi = None

try:
import ctypes
except ImportError:
ctypes = None
SIZEOF_WCHAR_T = -1
else:
SIZEOF_WCHAR_T = ctypes.sizeof(ctypes.c_wchar)

def codecs_open_no_warn(*args, **kwargs):
"""Call codecs.open(*args, **kwargs) ignoring DeprecationWarning."""
with warnings.catch_warnings():
warnings.simplefilter("ignore")
return codecs.open(*args, **kwargs)

def coding_checker(self, coder):
def check(input, expect):
Expand All @@ -35,13 +35,13 @@ def check(input, expect):

# On small versions of Windows like Windows IoT or Windows Nano Server not all codepages are present
def is_code_page_present(cp):
from ctypes import POINTER, WINFUNCTYPE, WinDLL
from ctypes import POINTER, WINFUNCTYPE, WinDLL, Structure
from ctypes.wintypes import BOOL, BYTE, WCHAR, UINT, DWORD

MAX_LEADBYTES = 12 # 5 ranges, 2 bytes ea., 0 term.
MAX_DEFAULTCHAR = 2 # single or double byte
MAX_PATH = 260
class CPINFOEXW(ctypes.Structure):
class CPINFOEXW(Structure):
_fields_ = [("MaxCharSize", UINT),
("DefaultChar", BYTE*MAX_DEFAULTCHAR),
("LeadByte", BYTE*MAX_LEADBYTES),
Expand Down Expand Up @@ -719,19 +719,19 @@ def test_bug691291(self):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
with open(os_helper.TESTFN, 'wb') as fp:
fp.write(s)
with codecs.open(os_helper.TESTFN, 'r',
with codecs_open_no_warn(os_helper.TESTFN, 'r',
encoding=self.encoding) as reader:
self.assertEqual(reader.read(), s1)

def test_invalid_modes(self):
for mode in ('U', 'rU', 'r+U'):
with self.assertRaises(ValueError) as cm:
codecs.open(os_helper.TESTFN, mode, encoding=self.encoding)
codecs_open_no_warn(os_helper.TESTFN, mode, encoding=self.encoding)
self.assertIn('invalid mode', str(cm.exception))

for mode in ('rt', 'wt', 'at', 'r+t'):
with self.assertRaises(ValueError) as cm:
codecs.open(os_helper.TESTFN, mode, encoding=self.encoding)
codecs_open_no_warn(os_helper.TESTFN, mode, encoding=self.encoding)
self.assertIn("can't have text and binary mode at once",
str(cm.exception))

Expand Down Expand Up @@ -1844,9 +1844,9 @@ def test_all(self):
def test_open(self):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
for mode in ('w', 'r', 'r+', 'w+', 'a', 'a+'):
with self.subTest(mode), \
codecs.open(os_helper.TESTFN, mode, 'ascii') as file:
self.assertIsInstance(file, codecs.StreamReaderWriter)
with self.subTest(mode), self.assertWarns(DeprecationWarning):
with codecs.open(os_helper.TESTFN, mode, 'ascii') as file:
self.assertIsInstance(file, codecs.StreamReaderWriter)

def test_undefined(self):
self.assertRaises(UnicodeError, codecs.encode, 'abc', 'undefined')
Expand All @@ -1863,7 +1863,7 @@ def test_file_closes_if_lookup_error_raised(self):
mock_open = mock.mock_open()
with mock.patch('builtins.open', mock_open) as file:
with self.assertRaises(LookupError):
codecs.open(os_helper.TESTFN, 'wt', 'invalid-encoding')
codecs_open_no_warn(os_helper.TESTFN, 'wt', 'invalid-encoding')

file().close.assert_called()

Expand Down Expand Up @@ -2883,7 +2883,7 @@ def test_seek0(self):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
for encoding in tests:
# Check if the BOM is written only once
with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f:
f.write(data)
f.write(data)
f.seek(0)
Expand All @@ -2892,7 +2892,7 @@ def test_seek0(self):
self.assertEqual(f.read(), data * 2)

# Check that the BOM is written after a seek(0)
with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f:
f.write(data[0])
self.assertNotEqual(f.tell(), 0)
f.seek(0)
Expand All @@ -2901,7 +2901,7 @@ def test_seek0(self):
self.assertEqual(f.read(), data)

# (StreamWriter) Check that the BOM is written after a seek(0)
with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f:
f.writer.write(data[0])
self.assertNotEqual(f.writer.tell(), 0)
f.writer.seek(0)
Expand All @@ -2911,7 +2911,7 @@ def test_seek0(self):

# Check that the BOM is not written after a seek() at a position
# different than the start
with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f:
f.write(data)
f.seek(f.tell())
f.write(data)
Expand All @@ -2920,7 +2920,7 @@ def test_seek0(self):

# (StreamWriter) Check that the BOM is not written after a seek()
# at a position different than the start
with codecs.open(os_helper.TESTFN, 'w+', encoding=encoding) as f:
with codecs_open_no_warn(os_helper.TESTFN, 'w+', encoding=encoding) as f:
f.writer.write(data)
f.writer.seek(f.writer.tell())
f.writer.write(data)
Expand Down
3 changes: 2 additions & 1 deletion Lib/test/test_multibytecodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,8 @@ def test_bug1728403(self):
f.write(b'\xa1')
finally:
f.close()
f = codecs.open(TESTFN, encoding='cp949')
with self.assertWarns(DeprecationWarning):
f = codecs.open(TESTFN, encoding='cp949')
try:
self.assertRaises(UnicodeDecodeError, f.read, 2)
finally:
Expand Down
6 changes: 3 additions & 3 deletions Lib/test/test_sax.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# regression test for SAX 2.0
# $Id$

from xml.sax import make_parser, ContentHandler, \
SAXException, SAXReaderNotAvailable, SAXParseException
Expand Down Expand Up @@ -832,8 +831,9 @@ class StreamReaderWriterXmlgenTest(XmlgenTest, unittest.TestCase):
fname = os_helper.TESTFN + '-codecs'

def ioclass(self):
writer = codecs.open(self.fname, 'w', encoding='ascii',
errors='xmlcharrefreplace', buffering=0)
with self.assertWarns(DeprecationWarning):
writer = codecs.open(self.fname, 'w', encoding='ascii',
errors='xmlcharrefreplace', buffering=0)
def cleanup():
writer.close()
os_helper.unlink(self.fname)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:func:`codecs.open` is now deprecated. Use :func:`open` instead. Contributed
by Inada Naoki.
5 changes: 2 additions & 3 deletions Modules/_io/textio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1185,7 +1185,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
}

/* Check we have been asked for a real text encoding */
codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
codec_info = _PyCodec_LookupTextEncoding(encoding, NULL);
if (codec_info == NULL) {
Py_CLEAR(self->encoding);
goto error;
Expand Down Expand Up @@ -1324,8 +1324,7 @@ textiowrapper_change_encoding(textio *self, PyObject *encoding,
}

// Create new encoder & decoder
PyObject *codec_info = _PyCodec_LookupTextEncoding(
c_encoding, "codecs.open()");
PyObject *codec_info = _PyCodec_LookupTextEncoding(c_encoding, NULL);
if (codec_info == NULL) {
Py_DECREF(encoding);
Py_DECREF(errors);
Expand Down
18 changes: 13 additions & 5 deletions Python/codecs.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,11 +540,19 @@ PyObject * _PyCodec_LookupTextEncoding(const char *encoding,
Py_DECREF(attr);
if (is_text_codec <= 0) {
Py_DECREF(codec);
if (!is_text_codec)
PyErr_Format(PyExc_LookupError,
"'%.400s' is not a text encoding; "
"use %s to handle arbitrary codecs",
encoding, alternate_command);
if (!is_text_codec) {
if (alternate_command != NULL) {
PyErr_Format(PyExc_LookupError,
"'%.400s' is not a text encoding; "
"use %s to handle arbitrary codecs",
encoding, alternate_command);
}
else {
PyErr_Format(PyExc_LookupError,
"'%.400s' is not a text encoding",
encoding);
}
}
return NULL;
}
}
Expand Down
Loading