From a3c014b8e7d9b41fba4c790bc01247c556b16ca9 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 7 Apr 2020 17:29:03 +0900 Subject: [PATCH 01/50] Raise a warning when encoding is omitted The warning is raised only in dev mode. pathlib uses the new `io.text_encoding` helper function. Other libraries will follow. --- Lib/_pyio.py | 46 +++++++++++++++++++++++------- Lib/io.py | 2 +- Lib/pathlib.py | 4 +++ Modules/_io/_iomodule.c | 40 ++++++++++++++++++++++++++ Modules/_io/clinic/_iomodule.c.h | 49 ++++++++++++++++++++++++++++++++ Modules/_io/textio.c | 12 ++++++++ 6 files changed, 142 insertions(+), 11 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 4804ed27cd14d6..35c9705aa22e47 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -40,6 +40,28 @@ _CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE +def text_encoding(encoding, stacklevel=1): + """ + Helper function to choose the text encoding. + + When encoding is not None, just return it. + Otherwise, return the default text encoding ("locale" for now) + and raise a DeprecationWarning in dev mode. + + This function can be used in APIs having encoding=None option. + But please consider encoding="utf-8" for new APIs. + """ + if encoding is None: + if sys.flags.dev_mode: + import warnings + warnings.warn( + "'encoding' option is not specified. The default encoding " + "will be changed to 'utf-8' in the future", + DeprecationWarning, stacklevel + 2) + encoding = "locale" + return encoding + + def open(file, mode="r", buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None): @@ -248,6 +270,7 @@ def open(file, mode="r", buffering=-1, encoding=None, errors=None, result = buffer if binary: return result + encoding = text_encoding(encoding) text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) result = text text.mode = mode @@ -2004,19 +2027,22 @@ class TextIOWrapper(TextIOBase): def __init__(self, buffer, encoding=None, errors=None, newline=None, line_buffering=False, write_through=False): self._check_newline(newline) - if encoding is None: + encoding = text_encoding(encoding) + + if encoding == "locale": try: - encoding = os.device_encoding(buffer.fileno()) + encoding = os.device_encoding(buffer.fileno()) or "locale" except (AttributeError, UnsupportedOperation): pass - if encoding is None: - try: - import locale - except ImportError: - # Importing locale may fail if Python is being built - encoding = "ascii" - else: - encoding = locale.getpreferredencoding(False) + + if encoding == "locale": + try: + import locale + except ImportError: + # Importing locale may fail if Python is being built + encoding = "utf-8" + else: + encoding = locale.getpreferredencoding(False) if not isinstance(encoding, str): raise ValueError("invalid encoding: %r" % encoding) diff --git a/Lib/io.py b/Lib/io.py index fbce6efc010c07..01f1df80ded297 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -54,7 +54,7 @@ from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, open, open_code, FileIO, BytesIO, StringIO, BufferedReader, BufferedWriter, BufferedRWPair, BufferedRandom, - IncrementalNewlineDecoder, TextIOWrapper) + IncrementalNewlineDecoder, text_encoding, TextIOWrapper) OpenWrapper = _io.open # for compatibility with _pyio diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 531a699a40df49..5c9284b331a328 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -1241,6 +1241,8 @@ def open(self, mode='r', buffering=-1, encoding=None, Open the file pointed by this path and return a file object, as the built-in open() function does. """ + if "b" not in mode: + encoding = io.text_encoding(encoding) return io.open(self, mode, buffering, encoding, errors, newline, opener=self._opener) @@ -1255,6 +1257,7 @@ def read_text(self, encoding=None, errors=None): """ Open the file in text mode, read it, and close the file. """ + encoding = io.text_encoding(encoding) with self.open(mode='r', encoding=encoding, errors=errors) as f: return f.read() @@ -1274,6 +1277,7 @@ def write_text(self, data, encoding=None, errors=None, newline=None): if not isinstance(data, str): raise TypeError('data must be str, not %s' % data.__class__.__name__) + encoding = io.text_encoding(encoding) with self.open(mode='w', encoding=encoding, errors=errors, newline=newline) as f: return f.write(data) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 9147648b243bed..ef0418ff9f0a7d 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -33,6 +33,7 @@ PyObject *_PyIO_str_fileno = NULL; PyObject *_PyIO_str_flush = NULL; PyObject *_PyIO_str_getstate = NULL; PyObject *_PyIO_str_isatty = NULL; +PyObject *_PyIO_str_locale = NULL; PyObject *_PyIO_str_newlines = NULL; PyObject *_PyIO_str_nl = NULL; PyObject *_PyIO_str_peek = NULL; @@ -504,6 +505,43 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, return NULL; } + +/*[clinic input] +_io.text_encoding + encoding: object = NULL + stacklevel: int = 1 + / + +Helper function to choose the text encoding. + +When encoding is not None, just return it. +Otherwise, return the default text encoding ("locale" for now) +and raise a DeprecationWarning in dev mode. + +This function can be used in APIs having encoding=None option. +But please consider encoding="utf-8" for new APIs. +[clinic start generated code]*/ + +static PyObject * +_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) +/*[clinic end generated code: output=91b2cfea6934cc0c input=46b896c6a7111a95]*/ +{ + if (encoding == NULL || encoding == Py_None) { + PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); + if (interp->config.dev_mode) { + PyErr_WarnEx(PyExc_DeprecationWarning, + "'encoding' option is not specified. The default encoding " + "will be changed to 'utf-8' in the future", + stacklevel + 1); + } + Py_INCREF(_PyIO_str_locale); + return _PyIO_str_locale; + } + Py_INCREF(encoding); + return encoding; +} + + /*[clinic input] _io.open_code @@ -629,6 +667,7 @@ iomodule_free(PyObject *mod) { static PyMethodDef module_methods[] = { _IO_OPEN_METHODDEF + _IO_TEXT_ENCODING_METHODDEF _IO_OPEN_CODE_METHODDEF {NULL, NULL} }; @@ -747,6 +786,7 @@ PyInit__io(void) ADD_INTERNED(flush) ADD_INTERNED(getstate) ADD_INTERNED(isatty) + ADD_INTERNED(locale) ADD_INTERNED(newlines) ADD_INTERNED(peek) ADD_INTERNED(read) diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index dc7b5ff243a784..f88b32b691e808 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -272,6 +272,55 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw return return_value; } +PyDoc_STRVAR(_io_text_encoding__doc__, +"text_encoding($module, encoding=, stacklevel=1, /)\n" +"--\n" +"\n" +"Select text encoding for TextIOWrapper.\n" +"\n" +"Returns the default text encoding for TextIOWrapper when encoding is None.\n" +"The default text encoding is \"locale\" for now, but it will be changed\n" +"to \"utf-8\" in the future."); + +#define _IO_TEXT_ENCODING_METHODDEF \ + {"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__}, + +static PyObject * +_io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel); + +static PyObject * +_io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + PyObject *encoding = NULL; + int stacklevel = 1; + + if (!_PyArg_CheckPositional("text_encoding", nargs, 0, 2)) { + goto exit; + } + if (nargs < 1) { + goto skip_optional; + } + encoding = args[0]; + if (nargs < 2) { + goto skip_optional; + } + if (PyFloat_Check(args[1])) { + PyErr_SetString(PyExc_TypeError, + "integer argument expected, got float" ); + goto exit; + } + stacklevel = _PyLong_AsInt(args[1]); + if (stacklevel == -1 && PyErr_Occurred()) { + goto exit; + } +skip_optional: + return_value = _io_text_encoding_impl(module, encoding, stacklevel); + +exit: + return return_value; +} + PyDoc_STRVAR(_io_open_code__doc__, "open_code($module, /, path)\n" "--\n" diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index f08d14e18b4021..7b9aace56bc544 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1123,6 +1123,18 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, self->encodefunc = NULL; self->b2cratio = 0.0; + if (encoding == NULL) { + PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); + if (interp->config.dev_mode) { + PyErr_WarnEx(PyExc_DeprecationWarning, + "'encoding' option is not specified. The default encoding " + "will be changed to 'utf-8' in the future", 1); + } + } + else if (strcmp(encoding, "locale") == 0) { + encoding = NULL; + } + if (encoding == NULL) { /* Try os.device_encoding(fileno) */ PyObject *fileno; From 050bd1bfed5fc475b3e09e79872cc555e824226d Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 12 Apr 2020 16:17:34 +0900 Subject: [PATCH 02/50] add test --- Lib/test/test_io.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index cc54d0ea0062f2..3325026467a04f 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4222,6 +4222,29 @@ def test_check_encoding_errors(self): proc = assert_python_failure('-X', 'dev', '-c', code) self.assertEqual(proc.rc, 10, proc) + def test_check_encoding_warning(self): + # PEP 597: Raise warning when encoding is not specified + # and dev mode is enabled. + mod = self.io.__name__ + filename = __file__ + code = textwrap.dedent(f'''\ + import sys + from {mod} import open, TextIOWrapper + import pathlib + + with open({filename!r}) as f: # line 5 + pass + + pathlib.Path({filename!r}).read_text() # line 8 + ''') + proc = assert_python_ok('-X', 'dev', '-c', code) + warnings = proc.err.splitlines() + self.assertEqual(len(warnings), 2) + self.assertTrue( + warnings[0].startswith(b":5: DeprecationWarning: ")) + self.assertTrue( + warnings[1].startswith(b":8: DeprecationWarning: ")) + class CMiscIOTest(MiscIOTest): io = io From 939f4a016c2997f09efbdc7fab5ce5ff3caa926f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 12 Apr 2020 16:43:15 +0900 Subject: [PATCH 03/50] wrap encoding=None with text_encoding. --- Lib/bz2.py | 1 + Lib/gzip.py | 1 + Lib/lzma.py | 1 + Lib/tempfile.py | 7 +++++++ 4 files changed, 10 insertions(+) diff --git a/Lib/bz2.py b/Lib/bz2.py index ce07ebeb142d92..1da3ce65c81b7d 100644 --- a/Lib/bz2.py +++ b/Lib/bz2.py @@ -311,6 +311,7 @@ def open(filename, mode="rb", compresslevel=9, binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel) if "t" in mode: + encoding = io.text_encoding(encoding) return io.TextIOWrapper(binary_file, encoding, errors, newline) else: return binary_file diff --git a/Lib/gzip.py b/Lib/gzip.py index e422773b3edfb7..89a3633e6e8ace 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -62,6 +62,7 @@ def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST, raise TypeError("filename must be a str or bytes object, or a file") if "t" in mode: + encoding = io.text_encoding(encoding) return io.TextIOWrapper(binary_file, encoding, errors, newline) else: return binary_file diff --git a/Lib/lzma.py b/Lib/lzma.py index 0817b872d2019f..c8b197055cddce 100644 --- a/Lib/lzma.py +++ b/Lib/lzma.py @@ -302,6 +302,7 @@ def open(filename, mode="rb", *, preset=preset, filters=filters) if "t" in mode: + encoding = io.text_encoding(encoding) return io.TextIOWrapper(binary_file, encoding, errors, newline) else: return binary_file diff --git a/Lib/tempfile.py b/Lib/tempfile.py index c3fe61aa0af4f7..6556ad4317ccc2 100644 --- a/Lib/tempfile.py +++ b/Lib/tempfile.py @@ -535,6 +535,9 @@ def NamedTemporaryFile(mode='w+b', buffering=-1, encoding=None, if _os.name == 'nt' and delete: flags |= _os.O_TEMPORARY + if "b" not in mode: + encoding = _io.text_encoding(encoding) + (fd, name) = _mkstemp_inner(dir, prefix, suffix, flags, output_type) try: file = _io.open(fd, mode, buffering=buffering, @@ -575,6 +578,9 @@ def TemporaryFile(mode='w+b', buffering=-1, encoding=None, """ global _O_TMPFILE_WORKS + if "b" not in mode: + encoding = _io.text_encoding(encoding) + prefix, suffix, dir, output_type = _sanitize_params(prefix, suffix, dir) flags = _bin_openflags @@ -630,6 +636,7 @@ def __init__(self, max_size=0, mode='w+b', buffering=-1, if 'b' in mode: self._file = _io.BytesIO() else: + encoding = _io.text_encoding(encoding) self._file = _io.TextIOWrapper(_io.BytesIO(), encoding=encoding, errors=errors, newline=newline) From 3c997779b936a07e93163d21addb164634224087 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 29 Jan 2021 17:46:52 +0900 Subject: [PATCH 04/50] Add io.LOCALE_ENCODING = "locale" --- Lib/io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Lib/io.py b/Lib/io.py index 01f1df80ded297..7a2076e3f1b68a 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -66,6 +66,9 @@ SEEK_CUR = 1 SEEK_END = 2 +# PEP 597 +LOCALE_ENCODING = "locale" + # Declaring ABCs in C is tricky so we do it here. # Method descriptions and default implementations are inherited from the C # version however. From 40162780454dffdc69ca2931073f43584cb9a231 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 29 Jan 2021 17:49:22 +0900 Subject: [PATCH 05/50] Add EncodingWarning. Copied from https://github.com/python/cpython/pull/13938 --- Doc/library/exceptions.rst | 8 ++++++++ Include/pyerrors.h | 1 + Lib/_pyio.py | 4 ++-- Lib/test/test_io.py | 9 +++++++-- Modules/_io/_iomodule.c | 11 ++++++----- Modules/_io/clinic/_iomodule.c.h | 18 ++++++++---------- Modules/_io/textio.c | 4 ++-- Objects/exceptions.c | 9 +++++++++ 8 files changed, 43 insertions(+), 21 deletions(-) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 1028213699d637..366c5711ea0500 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -688,6 +688,14 @@ The following exceptions are used as warning categories; see the Base class for warnings generated by user code. +.. exception:: EncodingWarning + + Base class for warnings about encodings when those warnings are intended for + other Python developers. + + .. versionadded:: 3.10 + + .. exception:: DeprecationWarning Base class for warnings about deprecated features when those warnings are diff --git a/Include/pyerrors.h b/Include/pyerrors.h index 979a26ba68a033..e0b0ccba0f2a28 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -144,6 +144,7 @@ PyAPI_DATA(PyObject *) PyExc_WindowsError; /* Predefined warning categories */ PyAPI_DATA(PyObject *) PyExc_Warning; PyAPI_DATA(PyObject *) PyExc_UserWarning; +PyAPI_DATA(PyObject *) PyExc_EncodingWarning; PyAPI_DATA(PyObject *) PyExc_DeprecationWarning; PyAPI_DATA(PyObject *) PyExc_PendingDeprecationWarning; PyAPI_DATA(PyObject *) PyExc_SyntaxWarning; diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 35c9705aa22e47..6ffda5c4bc715e 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -46,7 +46,7 @@ def text_encoding(encoding, stacklevel=1): When encoding is not None, just return it. Otherwise, return the default text encoding ("locale" for now) - and raise a DeprecationWarning in dev mode. + and raise a EncodingWarning in dev mode. This function can be used in APIs having encoding=None option. But please consider encoding="utf-8" for new APIs. @@ -57,7 +57,7 @@ def text_encoding(encoding, stacklevel=1): warnings.warn( "'encoding' option is not specified. The default encoding " "will be changed to 'utf-8' in the future", - DeprecationWarning, stacklevel + 2) + EncodingWarning, stacklevel + 2) encoding = "locale" return encoding diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 3325026467a04f..1dc1d602086e64 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3697,6 +3697,11 @@ def test_issue25862(self): t.write('x') t.tell() + def test_encoding_warning(self): + with support.check_warnings(('encoding=None', EncodingWarning)): + t = self.TextIOWrapper(self.BytesIO(b'test'), encoding=None) + t.close() + class MemviewBytesIO(io.BytesIO): '''A BytesIO object whose read method returns memoryviews @@ -4241,9 +4246,9 @@ def test_check_encoding_warning(self): warnings = proc.err.splitlines() self.assertEqual(len(warnings), 2) self.assertTrue( - warnings[0].startswith(b":5: DeprecationWarning: ")) + warnings[0].startswith(b":5: EncodingWarning: ")) self.assertTrue( - warnings[1].startswith(b":8: DeprecationWarning: ")) + warnings[1].startswith(b":8: EncodingWarning: ")) class CMiscIOTest(MiscIOTest): diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index ef0418ff9f0a7d..5c0348100c84a0 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -10,6 +10,7 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" #include "_iomodule.h" +#include "pycore_pystate.h" // _PyInterpreterState_GET() #ifdef HAVE_SYS_TYPES_H #include @@ -516,7 +517,7 @@ Helper function to choose the text encoding. When encoding is not None, just return it. Otherwise, return the default text encoding ("locale" for now) -and raise a DeprecationWarning in dev mode. +and raise a EncodingWarning in dev mode. This function can be used in APIs having encoding=None option. But please consider encoding="utf-8" for new APIs. @@ -524,12 +525,12 @@ But please consider encoding="utf-8" for new APIs. static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=46b896c6a7111a95]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=f6a73a49e0f1a2f4]*/ { if (encoding == NULL || encoding == Py_None) { - PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); - if (interp->config.dev_mode) { - PyErr_WarnEx(PyExc_DeprecationWarning, + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!_PyInterpreterState_GetConfig(interp)->dev_mode) { + PyErr_WarnEx(PyExc_EncodingWarning, "'encoding' option is not specified. The default encoding " "will be changed to 'utf-8' in the future", stacklevel + 1); diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index f88b32b691e808..a2dbaf57b3b9fb 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -276,11 +276,14 @@ PyDoc_STRVAR(_io_text_encoding__doc__, "text_encoding($module, encoding=, stacklevel=1, /)\n" "--\n" "\n" -"Select text encoding for TextIOWrapper.\n" +"Helper function to choose the text encoding.\n" "\n" -"Returns the default text encoding for TextIOWrapper when encoding is None.\n" -"The default text encoding is \"locale\" for now, but it will be changed\n" -"to \"utf-8\" in the future."); +"When encoding is not None, just return it.\n" +"Otherwise, return the default text encoding (\"locale\" for now)\n" +"and raise a EncodingWarning in dev mode.\n" +"\n" +"This function can be used in APIs having encoding=None option.\n" +"But please consider encoding=\"utf-8\" for new APIs."); #define _IO_TEXT_ENCODING_METHODDEF \ {"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__}, @@ -305,11 +308,6 @@ _io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs) if (nargs < 2) { goto skip_optional; } - if (PyFloat_Check(args[1])) { - PyErr_SetString(PyExc_TypeError, - "integer argument expected, got float" ); - goto exit; - } stacklevel = _PyLong_AsInt(args[1]); if (stacklevel == -1 && PyErr_Occurred()) { goto exit; @@ -362,4 +360,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=5c0dd7a262c30ebc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=24a1210fcb9d6a71 input=a9049054013a1b77]*/ diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 7b9aace56bc544..ca8b10bee7f927 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1124,8 +1124,8 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, self->b2cratio = 0.0; if (encoding == NULL) { - PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE(); - if (interp->config.dev_mode) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + if (!_PyInterpreterState_GetConfig(interp)->dev_mode) { PyErr_WarnEx(PyExc_DeprecationWarning, "'encoding' option is not specified. The default encoding " "will be changed to 'utf-8' in the future", 1); diff --git a/Objects/exceptions.c b/Objects/exceptions.c index d4824938a0f507..38f2bf15f40694 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2406,6 +2406,13 @@ SimpleExtendsException(PyExc_Warning, UserWarning, "Base class for warnings generated by user code."); +/* + * EncodingWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, EncodingWarning, + "Base class for warnings about encodings."); + + /* * DeprecationWarning extends Warning */ @@ -2594,6 +2601,7 @@ _PyExc_Init(PyThreadState *tstate) PRE_INIT(BufferError); PRE_INIT(Warning); PRE_INIT(UserWarning); + PRE_INIT(EncodingWarning); PRE_INIT(DeprecationWarning); PRE_INIT(PendingDeprecationWarning); PRE_INIT(SyntaxWarning); @@ -2733,6 +2741,7 @@ _PyBuiltins_AddExceptions(PyObject *bltinmod) POST_INIT(BufferError); POST_INIT(Warning); POST_INIT(UserWarning); + POST_INIT(EncodingWarning); POST_INIT(DeprecationWarning); POST_INIT(PendingDeprecationWarning); POST_INIT(SyntaxWarning); From c5c556cc3bff71b6392740f6c09e689ed99da2f8 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 29 Jan 2021 20:39:41 +0900 Subject: [PATCH 06/50] Add sys.warn_default_encoding --- Include/cpython/initconfig.h | 1 + Include/internal/pycore_initconfig.h | 4 +++- Lib/_pyio.py | 8 +++++--- Lib/test/test_io.py | 2 +- Lib/test/test_sys.py | 3 ++- Modules/_io/_iomodule.c | 12 ++++++------ Modules/_io/textio.c | 7 +++---- Objects/exceptions.c | 14 +++++++------- Python/initconfig.c | 14 +++++++++++++- Python/preconfig.c | 13 +++++++++++++ Python/sysmodule.c | 4 +++- 11 files changed, 57 insertions(+), 25 deletions(-) diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 666c1e419ca24d..56ec83e97ee75d 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -134,6 +134,7 @@ typedef struct PyConfig { int isolated; int use_environment; int dev_mode; + int warn_default_encoding; int install_signal_handlers; int use_hash_seed; unsigned long hash_seed; diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 28cd57030e2181..7099bee267b0e8 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -102,13 +102,15 @@ typedef struct { int isolated; /* -I option */ int use_environment; /* -E option */ int dev_mode; /* -X dev and PYTHONDEVMODE */ + int warn_default_encoding; /* -X warndefaultencoding and PYTHONWARNDEFAULTENCODING */ } _PyPreCmdline; #define _PyPreCmdline_INIT \ (_PyPreCmdline){ \ .use_environment = -1, \ .isolated = -1, \ - .dev_mode = -1} + .dev_mode = -1, \ + .warn_default_encoding = -1} /* Note: _PyPreCmdline_INIT sets other fields to 0/NULL */ extern void _PyPreCmdline_Clear(_PyPreCmdline *cmdline); diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 6ffda5c4bc715e..85c4b0c9f5dde2 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -45,14 +45,16 @@ def text_encoding(encoding, stacklevel=1): Helper function to choose the text encoding. When encoding is not None, just return it. - Otherwise, return the default text encoding ("locale" for now) - and raise a EncodingWarning in dev mode. + Otherwise, return the default text encoding (i.e. "locale"). + + This function emits EncodingWarning if *encoding* is None and + sys.flags.encoding_warning is true. This function can be used in APIs having encoding=None option. But please consider encoding="utf-8" for new APIs. """ if encoding is None: - if sys.flags.dev_mode: + if sys.flags.warn_default_encoding: import warnings warnings.warn( "'encoding' option is not specified. The default encoding " diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 1dc1d602086e64..2cb4216815b8c7 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4242,7 +4242,7 @@ def test_check_encoding_warning(self): pathlib.Path({filename!r}).read_text() # line 8 ''') - proc = assert_python_ok('-X', 'dev', '-c', code) + proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code) warnings = proc.err.splitlines() self.assertEqual(len(warnings), 2) self.assertTrue( diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index c4e053594800b2..35e6c088ea9c79 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -591,7 +591,8 @@ def test_sys_flags(self): "inspect", "interactive", "optimize", "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", - "hash_randomization", "isolated", "dev_mode", "utf8_mode") + "hash_randomization", "isolated", "dev_mode", "utf8_mode", + "warn_default_encoding") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr == "dev_mode" else int diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 5c0348100c84a0..44768804f38def 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -516,8 +516,10 @@ _io.text_encoding Helper function to choose the text encoding. When encoding is not None, just return it. -Otherwise, return the default text encoding ("locale" for now) -and raise a EncodingWarning in dev mode. +Otherwise, return the default text encoding (i.e. "locale"). + +This function emits EncodingWarning if *encoding* is None and +sys.flags.encoding_warning is true. This function can be used in APIs having encoding=None option. But please consider encoding="utf-8" for new APIs. @@ -529,11 +531,9 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_PyInterpreterState_GetConfig(interp)->dev_mode) { + if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { PyErr_WarnEx(PyExc_EncodingWarning, - "'encoding' option is not specified. The default encoding " - "will be changed to 'utf-8' in the future", - stacklevel + 1); + "'encoding' option is omitted", stacklevel + 1); } Py_INCREF(_PyIO_str_locale); return _PyIO_str_locale; diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index ca8b10bee7f927..24880598e83d4d 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1125,10 +1125,9 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, if (encoding == NULL) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (!_PyInterpreterState_GetConfig(interp)->dev_mode) { - PyErr_WarnEx(PyExc_DeprecationWarning, - "'encoding' option is not specified. The default encoding " - "will be changed to 'utf-8' in the future", 1); + if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { + PyErr_WarnEx(PyExc_EncodingWarning, + "'encoding' option is omitted", 1); } } else if (strcmp(encoding, "locale") == 0) { diff --git a/Objects/exceptions.c b/Objects/exceptions.c index 38f2bf15f40694..ab0ff06f2b067f 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2406,13 +2406,6 @@ SimpleExtendsException(PyExc_Warning, UserWarning, "Base class for warnings generated by user code."); -/* - * EncodingWarning extends Warning - */ -SimpleExtendsException(PyExc_Warning, EncodingWarning, - "Base class for warnings about encodings."); - - /* * DeprecationWarning extends Warning */ @@ -2428,6 +2421,13 @@ SimpleExtendsException(PyExc_Warning, PendingDeprecationWarning, "in the future."); +/* + * EncodingWarning extends Warning + */ +SimpleExtendsException(PyExc_DeprecationWarning, EncodingWarning, + "Base class for warnings about encodings."); + + /* * SyntaxWarning extends Warning */ diff --git a/Python/initconfig.c b/Python/initconfig.c index 62087fb4208dd4..c385423063f4d6 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -94,6 +94,7 @@ static const char usage_3[] = "\ otherwise activate automatically)\n\ -X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\ given directory instead of to the code tree\n\ + -X warn_default_encoding: enable opt-in EncodingWarning for 'encoding=None'\n\ \n\ --check-hash-based-pycs always|default|never:\n\ control how Python invalidates hash-based .pyc files\n\ @@ -129,7 +130,8 @@ static const char usage_6[] = "PYTHONBREAKPOINT: if this variable is set to 0, it disables the default\n" " debugger. It can be set to the callable of your debugger of choice.\n" "PYTHONDEVMODE: enable the development mode.\n" -"PYTHONPYCACHEPREFIX: root directory for bytecode cache (pyc) files.\n"; +"PYTHONPYCACHEPREFIX: root directory for bytecode cache (pyc) files.\n" +"PYTHONWARNDEFAULTENCODING: enable opt-in EncodingWarning for 'encoding=None'.\n"; #if defined(MS_WINDOWS) # define PYTHONHOMEHELP "\\python{major}{minor}" @@ -589,6 +591,7 @@ config_check_consistency(const PyConfig *config) assert(config->isolated >= 0); assert(config->use_environment >= 0); assert(config->dev_mode >= 0); + assert(config->warn_default_encoding >= 0); assert(config->install_signal_handlers >= 0); assert(config->use_hash_seed >= 0); assert(config->hash_seed <= MAX_HASH_SEED); @@ -690,6 +693,7 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->isolated = -1; config->use_environment = -1; config->dev_mode = -1; + config->warn_default_encoding = -1; config->install_signal_handlers = 1; config->use_hash_seed = -1; config->faulthandler = -1; @@ -765,6 +769,7 @@ PyConfig_InitIsolatedConfig(PyConfig *config) config->use_environment = 0; config->user_site_directory = 0; config->dev_mode = 0; + config->warn_default_encoding = 0; config->install_signal_handlers = 0; config->use_hash_seed = 0; config->faulthandler = 0; @@ -873,6 +878,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(isolated); COPY_ATTR(use_environment); COPY_ATTR(dev_mode); + COPY_ATTR(warn_default_encoding); COPY_ATTR(install_signal_handlers); COPY_ATTR(use_hash_seed); COPY_ATTR(hash_seed); @@ -977,6 +983,7 @@ _PyConfig_AsDict(const PyConfig *config) SET_ITEM_INT(isolated); SET_ITEM_INT(use_environment); SET_ITEM_INT(dev_mode); + SET_ITEM_INT(warn_default_encoding); SET_ITEM_INT(install_signal_handlers); SET_ITEM_INT(use_hash_seed); SET_ITEM_UINT(hash_seed); @@ -1249,6 +1256,7 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict) GET_UINT(isolated); GET_UINT(use_environment); GET_UINT(dev_mode); + GET_UINT(warn_default_encoding); GET_UINT(install_signal_handlers); GET_UINT(use_hash_seed); if (config_dict_get_ulong(dict, "hash_seed", &config->hash_seed) < 0) { @@ -2136,6 +2144,10 @@ config_read(PyConfig *config, int compute_path_config) config->parse_argv = 2; } + if (config->warn_default_encoding < 0) { + config->warn_default_encoding = 0; + } + return _PyStatus_OK(); } diff --git a/Python/preconfig.c b/Python/preconfig.c index b8b0c3a0775ca8..561f5354b9bf34 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -169,6 +169,7 @@ _PyPreCmdline_SetConfig(const _PyPreCmdline *cmdline, PyConfig *config) COPY_ATTR(isolated); COPY_ATTR(use_environment); COPY_ATTR(dev_mode); + COPY_ATTR(warn_default_encoding); return _PyStatus_OK(); #undef COPY_ATTR @@ -257,9 +258,21 @@ _PyPreCmdline_Read(_PyPreCmdline *cmdline, const PyPreConfig *preconfig) cmdline->dev_mode = 0; } + // warn_default_encoding + if ((cmdline->warn_default_encoding < 0) + && (_Py_get_xoption(&cmdline->xoptions, L"warn_default_encoding") + || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNDEFAULTENCODING"))) + { + cmdline->warn_default_encoding = 1; + } + if (cmdline->warn_default_encoding < 0) { + cmdline->warn_default_encoding = 0; + } + assert(cmdline->use_environment >= 0); assert(cmdline->isolated >= 0); assert(cmdline->dev_mode >= 0); + assert(cmdline->warn_default_encoding >= 0); return _PyStatus_OK(); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b9349effe3c87b..c8c8c2f85cba49 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2506,6 +2506,7 @@ static PyStructSequence_Field flags_fields[] = { {"isolated", "-I"}, {"dev_mode", "-X dev"}, {"utf8_mode", "-X utf8"}, + {"warn_default_encoding", "-X warndefaultencoding"}, {0} }; @@ -2513,7 +2514,7 @@ static PyStructSequence_Desc flags_desc = { "sys.flags", /* name */ flags__doc__, /* doc */ flags_fields, /* fields */ - 15 + 16 }; static int @@ -2552,6 +2553,7 @@ set_flags_from_config(PyObject *flags, PyThreadState *tstate) SetFlag(config->use_hash_seed == 0 || config->hash_seed != 0); SetFlag(config->isolated); SetFlagObj(PyBool_FromLong(config->dev_mode)); + SetFlagObj(PyBool_FromLong(config->warn_default_encoding)); SetFlag(preconfig->utf8_mode); #undef SetFlagObj #undef SetFlag From d9a08c2f86cf915bb0dda457a83c91df15184e21 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 30 Jan 2021 18:04:46 +0900 Subject: [PATCH 07/50] shorten option names --- Include/cpython/initconfig.h | 2 +- Include/internal/pycore_initconfig.h | 4 ++-- Lib/_pyio.py | 2 +- Lib/test/test_io.py | 2 +- Lib/test/test_sys.py | 2 +- Modules/_io/_iomodule.c | 2 +- Modules/_io/textio.c | 2 +- Python/initconfig.c | 20 ++++++++++---------- Python/preconfig.c | 18 +++++++++--------- Python/sysmodule.c | 4 ++-- 10 files changed, 29 insertions(+), 29 deletions(-) diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 56ec83e97ee75d..cc108e1a66f8d5 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -134,7 +134,6 @@ typedef struct PyConfig { int isolated; int use_environment; int dev_mode; - int warn_default_encoding; int install_signal_handlers; int use_hash_seed; unsigned long hash_seed; @@ -154,6 +153,7 @@ typedef struct PyConfig { PyWideStringList warnoptions; int site_import; int bytes_warning; + int encoding_warning; int inspect; int interactive; int optimization_level; diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 7099bee267b0e8..046b8cfce24647 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -102,7 +102,7 @@ typedef struct { int isolated; /* -I option */ int use_environment; /* -E option */ int dev_mode; /* -X dev and PYTHONDEVMODE */ - int warn_default_encoding; /* -X warndefaultencoding and PYTHONWARNDEFAULTENCODING */ + int encoding_warning; /* -X warn_encoding and PYTHONWARNENCODING */ } _PyPreCmdline; #define _PyPreCmdline_INIT \ @@ -110,7 +110,7 @@ typedef struct { .use_environment = -1, \ .isolated = -1, \ .dev_mode = -1, \ - .warn_default_encoding = -1} + .encoding_warning = -1} /* Note: _PyPreCmdline_INIT sets other fields to 0/NULL */ extern void _PyPreCmdline_Clear(_PyPreCmdline *cmdline); diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 85c4b0c9f5dde2..521cf795892eaa 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -54,7 +54,7 @@ def text_encoding(encoding, stacklevel=1): But please consider encoding="utf-8" for new APIs. """ if encoding is None: - if sys.flags.warn_default_encoding: + if sys.flags.encoding_warning: import warnings warnings.warn( "'encoding' option is not specified. The default encoding " diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 2cb4216815b8c7..35527d11f7faff 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4242,7 +4242,7 @@ def test_check_encoding_warning(self): pathlib.Path({filename!r}).read_text() # line 8 ''') - proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code) + proc = assert_python_ok('-X', 'encoding_warning', '-c', code) warnings = proc.err.splitlines() self.assertEqual(len(warnings), 2) self.assertTrue( diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 35e6c088ea9c79..a9d5ceaeb5cf43 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -592,7 +592,7 @@ def test_sys_flags(self): "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", "hash_randomization", "isolated", "dev_mode", "utf8_mode", - "warn_default_encoding") + "encoding_warning") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr == "dev_mode" else int diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 44768804f38def..79e0e9b92c0924 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -531,7 +531,7 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { + if (_PyInterpreterState_GetConfig(interp)->encoding_warning) { PyErr_WarnEx(PyExc_EncodingWarning, "'encoding' option is omitted", stacklevel + 1); } diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 24880598e83d4d..f071bd59fb184e 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1125,7 +1125,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, if (encoding == NULL) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { + if (_PyInterpreterState_GetConfig(interp)->encoding_warning) { PyErr_WarnEx(PyExc_EncodingWarning, "'encoding' option is omitted", 1); } diff --git a/Python/initconfig.c b/Python/initconfig.c index c385423063f4d6..35c61edd844266 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -94,7 +94,7 @@ static const char usage_3[] = "\ otherwise activate automatically)\n\ -X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\ given directory instead of to the code tree\n\ - -X warn_default_encoding: enable opt-in EncodingWarning for 'encoding=None'\n\ + -X warn_encoding: enable opt-in EncodingWarning for 'encoding=None'\n\ \n\ --check-hash-based-pycs always|default|never:\n\ control how Python invalidates hash-based .pyc files\n\ @@ -131,7 +131,7 @@ static const char usage_6[] = " debugger. It can be set to the callable of your debugger of choice.\n" "PYTHONDEVMODE: enable the development mode.\n" "PYTHONPYCACHEPREFIX: root directory for bytecode cache (pyc) files.\n" -"PYTHONWARNDEFAULTENCODING: enable opt-in EncodingWarning for 'encoding=None'.\n"; +"PYTHONWARNENCODING: enable opt-in EncodingWarning for 'encoding=None'.\n"; #if defined(MS_WINDOWS) # define PYTHONHOMEHELP "\\python{major}{minor}" @@ -591,7 +591,6 @@ config_check_consistency(const PyConfig *config) assert(config->isolated >= 0); assert(config->use_environment >= 0); assert(config->dev_mode >= 0); - assert(config->warn_default_encoding >= 0); assert(config->install_signal_handlers >= 0); assert(config->use_hash_seed >= 0); assert(config->hash_seed <= MAX_HASH_SEED); @@ -603,6 +602,7 @@ config_check_consistency(const PyConfig *config) assert(config->malloc_stats >= 0); assert(config->site_import >= 0); assert(config->bytes_warning >= 0); + assert(config->encoding_warning >= 0); assert(config->inspect >= 0); assert(config->interactive >= 0); assert(config->optimization_level >= 0); @@ -693,7 +693,6 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->isolated = -1; config->use_environment = -1; config->dev_mode = -1; - config->warn_default_encoding = -1; config->install_signal_handlers = 1; config->use_hash_seed = -1; config->faulthandler = -1; @@ -702,6 +701,7 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->parse_argv = 0; config->site_import = -1; config->bytes_warning = -1; + config->encoding_warning = -1; config->inspect = -1; config->interactive = -1; config->optimization_level = -1; @@ -732,6 +732,7 @@ config_init_defaults(PyConfig *config) config->use_environment = 1; config->site_import = 1; config->bytes_warning = 0; + config->encoding_warning = 0; config->inspect = 0; config->interactive = 0; config->optimization_level = 0; @@ -769,7 +770,6 @@ PyConfig_InitIsolatedConfig(PyConfig *config) config->use_environment = 0; config->user_site_directory = 0; config->dev_mode = 0; - config->warn_default_encoding = 0; config->install_signal_handlers = 0; config->use_hash_seed = 0; config->faulthandler = 0; @@ -878,7 +878,6 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(isolated); COPY_ATTR(use_environment); COPY_ATTR(dev_mode); - COPY_ATTR(warn_default_encoding); COPY_ATTR(install_signal_handlers); COPY_ATTR(use_hash_seed); COPY_ATTR(hash_seed); @@ -912,6 +911,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(site_import); COPY_ATTR(bytes_warning); + COPY_ATTR(encoding_warning); COPY_ATTR(inspect); COPY_ATTR(interactive); COPY_ATTR(optimization_level); @@ -983,7 +983,6 @@ _PyConfig_AsDict(const PyConfig *config) SET_ITEM_INT(isolated); SET_ITEM_INT(use_environment); SET_ITEM_INT(dev_mode); - SET_ITEM_INT(warn_default_encoding); SET_ITEM_INT(install_signal_handlers); SET_ITEM_INT(use_hash_seed); SET_ITEM_UINT(hash_seed); @@ -1014,6 +1013,7 @@ _PyConfig_AsDict(const PyConfig *config) SET_ITEM_WSTR(platlibdir); SET_ITEM_INT(site_import); SET_ITEM_INT(bytes_warning); + SET_ITEM_INT(encoding_warning); SET_ITEM_INT(inspect); SET_ITEM_INT(interactive); SET_ITEM_INT(optimization_level); @@ -1256,7 +1256,6 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict) GET_UINT(isolated); GET_UINT(use_environment); GET_UINT(dev_mode); - GET_UINT(warn_default_encoding); GET_UINT(install_signal_handlers); GET_UINT(use_hash_seed); if (config_dict_get_ulong(dict, "hash_seed", &config->hash_seed) < 0) { @@ -1279,6 +1278,7 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict) GET_WSTRLIST(warnoptions); GET_UINT(site_import); GET_UINT(bytes_warning); + GET_UINT(encoding_warning); GET_UINT(inspect); GET_UINT(interactive); GET_UINT(optimization_level); @@ -2144,8 +2144,8 @@ config_read(PyConfig *config, int compute_path_config) config->parse_argv = 2; } - if (config->warn_default_encoding < 0) { - config->warn_default_encoding = 0; + if (config->encoding_warning < 0) { + config->encoding_warning = 0; } return _PyStatus_OK(); diff --git a/Python/preconfig.c b/Python/preconfig.c index 561f5354b9bf34..c733753e73244c 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -169,7 +169,7 @@ _PyPreCmdline_SetConfig(const _PyPreCmdline *cmdline, PyConfig *config) COPY_ATTR(isolated); COPY_ATTR(use_environment); COPY_ATTR(dev_mode); - COPY_ATTR(warn_default_encoding); + COPY_ATTR(encoding_warning); return _PyStatus_OK(); #undef COPY_ATTR @@ -258,21 +258,21 @@ _PyPreCmdline_Read(_PyPreCmdline *cmdline, const PyPreConfig *preconfig) cmdline->dev_mode = 0; } - // warn_default_encoding - if ((cmdline->warn_default_encoding < 0) - && (_Py_get_xoption(&cmdline->xoptions, L"warn_default_encoding") - || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNDEFAULTENCODING"))) + // encoding_warning + if ((cmdline->encoding_warning < 0) + && (_Py_get_xoption(&cmdline->xoptions, L"warn_encoding") + || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNENCODING"))) { - cmdline->warn_default_encoding = 1; + cmdline->encoding_warning = 1; } - if (cmdline->warn_default_encoding < 0) { - cmdline->warn_default_encoding = 0; + if (cmdline->encoding_warning < 0) { + cmdline->encoding_warning = 0; } assert(cmdline->use_environment >= 0); assert(cmdline->isolated >= 0); assert(cmdline->dev_mode >= 0); - assert(cmdline->warn_default_encoding >= 0); + assert(cmdline->encoding_warning >= 0); return _PyStatus_OK(); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index c8c8c2f85cba49..4dc2f801f12f08 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2506,7 +2506,7 @@ static PyStructSequence_Field flags_fields[] = { {"isolated", "-I"}, {"dev_mode", "-X dev"}, {"utf8_mode", "-X utf8"}, - {"warn_default_encoding", "-X warndefaultencoding"}, + {"encoding_warning", "-X warn_encoding"}, {0} }; @@ -2553,8 +2553,8 @@ set_flags_from_config(PyObject *flags, PyThreadState *tstate) SetFlag(config->use_hash_seed == 0 || config->hash_seed != 0); SetFlag(config->isolated); SetFlagObj(PyBool_FromLong(config->dev_mode)); - SetFlagObj(PyBool_FromLong(config->warn_default_encoding)); SetFlag(preconfig->utf8_mode); + SetFlagObj(PyBool_FromLong(config->encoding_warning)); #undef SetFlagObj #undef SetFlag return 0; From 772648eeb00bc2e033d15bcede241363f4af3a20 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 30 Jan 2021 18:23:22 +0900 Subject: [PATCH 08/50] EncodingWarning extends Warning --- Objects/exceptions.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Objects/exceptions.c b/Objects/exceptions.c index ab0ff06f2b067f..bb96e5b304e690 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2421,13 +2421,6 @@ SimpleExtendsException(PyExc_Warning, PendingDeprecationWarning, "in the future."); -/* - * EncodingWarning extends Warning - */ -SimpleExtendsException(PyExc_DeprecationWarning, EncodingWarning, - "Base class for warnings about encodings."); - - /* * SyntaxWarning extends Warning */ @@ -2473,6 +2466,13 @@ SimpleExtendsException(PyExc_Warning, BytesWarning, "related to conversion from str or comparing to str."); +/* + * EncodingWarning extends Warning + */ +SimpleExtendsException(PyExc_Warning, EncodingWarning, + "Base class for warnings about encodings."); + + /* * ResourceWarning extends Warning */ From 1a8e30592bc9535ebc3465ad4b037c92967cc39b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 30 Jan 2021 18:33:17 +0900 Subject: [PATCH 09/50] make clinic --- Modules/_io/_iomodule.c | 2 +- Modules/_io/clinic/_iomodule.c.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 79e0e9b92c0924..50235b06eecb64 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -527,7 +527,7 @@ But please consider encoding="utf-8" for new APIs. static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=f6a73a49e0f1a2f4]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=fd956a85976afd07]*/ { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index a2dbaf57b3b9fb..1c626e724e6de7 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -279,8 +279,10 @@ PyDoc_STRVAR(_io_text_encoding__doc__, "Helper function to choose the text encoding.\n" "\n" "When encoding is not None, just return it.\n" -"Otherwise, return the default text encoding (\"locale\" for now)\n" -"and raise a EncodingWarning in dev mode.\n" +"Otherwise, return the default text encoding (i.e. \"locale\").\n" +"\n" +"This function emits EncodingWarning if *encoding* is None and\n" +"sys.flags.encoding_warning is true.\n" "\n" "This function can be used in APIs having encoding=None option.\n" "But please consider encoding=\"utf-8\" for new APIs."); @@ -360,4 +362,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=24a1210fcb9d6a71 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f8081ebe49134854 input=a9049054013a1b77]*/ From 20966cd824f3ad09628ffd4ed0f0b20b2d2ed4fb Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 30 Jan 2021 20:39:58 +0900 Subject: [PATCH 10/50] fix test --- Include/pyerrors.h | 2 +- Lib/test/test_io.py | 4 ++-- Objects/exceptions.c | 2 +- PC/python3dll.c | 1 + 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Include/pyerrors.h b/Include/pyerrors.h index e0b0ccba0f2a28..0db5c65c26a30d 100644 --- a/Include/pyerrors.h +++ b/Include/pyerrors.h @@ -144,7 +144,6 @@ PyAPI_DATA(PyObject *) PyExc_WindowsError; /* Predefined warning categories */ PyAPI_DATA(PyObject *) PyExc_Warning; PyAPI_DATA(PyObject *) PyExc_UserWarning; -PyAPI_DATA(PyObject *) PyExc_EncodingWarning; PyAPI_DATA(PyObject *) PyExc_DeprecationWarning; PyAPI_DATA(PyObject *) PyExc_PendingDeprecationWarning; PyAPI_DATA(PyObject *) PyExc_SyntaxWarning; @@ -153,6 +152,7 @@ PyAPI_DATA(PyObject *) PyExc_FutureWarning; PyAPI_DATA(PyObject *) PyExc_ImportWarning; PyAPI_DATA(PyObject *) PyExc_UnicodeWarning; PyAPI_DATA(PyObject *) PyExc_BytesWarning; +PyAPI_DATA(PyObject *) PyExc_EncodingWarning; PyAPI_DATA(PyObject *) PyExc_ResourceWarning; diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 35527d11f7faff..4dbb3f5f97b6dc 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3698,7 +3698,7 @@ def test_issue25862(self): t.tell() def test_encoding_warning(self): - with support.check_warnings(('encoding=None', EncodingWarning)): + with warnings_helper.check_warnings(('encoding=None', EncodingWarning)): t = self.TextIOWrapper(self.BytesIO(b'test'), encoding=None) t.close() @@ -4242,7 +4242,7 @@ def test_check_encoding_warning(self): pathlib.Path({filename!r}).read_text() # line 8 ''') - proc = assert_python_ok('-X', 'encoding_warning', '-c', code) + proc = assert_python_ok('-X', 'warn_encoding', '-c', code) warnings = proc.err.splitlines() self.assertEqual(len(warnings), 2) self.assertTrue( diff --git a/Objects/exceptions.c b/Objects/exceptions.c index bb96e5b304e690..8ecb0c19abd1b6 100644 --- a/Objects/exceptions.c +++ b/Objects/exceptions.c @@ -2470,7 +2470,7 @@ SimpleExtendsException(PyExc_Warning, BytesWarning, * EncodingWarning extends Warning */ SimpleExtendsException(PyExc_Warning, EncodingWarning, - "Base class for warnings about encodings."); + "Base class for warnings about encodings."); /* diff --git a/PC/python3dll.c b/PC/python3dll.c index 542853abc894d5..7d38b4ff832dd0 100644 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -723,6 +723,7 @@ EXPORT_DATA(PyExc_BlockingIOError) EXPORT_DATA(PyExc_BrokenPipeError) EXPORT_DATA(PyExc_BufferError) EXPORT_DATA(PyExc_BytesWarning) +EXPORT_DATA(PyExc_EncodingWarning) EXPORT_DATA(PyExc_ChildProcessError) EXPORT_DATA(PyExc_ConnectionAbortedError) EXPORT_DATA(PyExc_ConnectionError) From 2b80f42cae850a730938bc82ec65c7fa935c83a2 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 30 Jan 2021 21:06:57 +0900 Subject: [PATCH 11/50] remove wrong test case --- Lib/test/test_io.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 4dbb3f5f97b6dc..c5a3b6bf49ff70 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -3697,11 +3697,6 @@ def test_issue25862(self): t.write('x') t.tell() - def test_encoding_warning(self): - with warnings_helper.check_warnings(('encoding=None', EncodingWarning)): - t = self.TextIOWrapper(self.BytesIO(b'test'), encoding=None) - t.close() - class MemviewBytesIO(io.BytesIO): '''A BytesIO object whose read method returns memoryviews From 760308cfd2b0508df8ce826fbab1c6328e63f4fa Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 30 Jan 2021 21:09:26 +0900 Subject: [PATCH 12/50] fix exception_hierarchy.txt --- Lib/test/exception_hierarchy.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/exception_hierarchy.txt b/Lib/test/exception_hierarchy.txt index 763a6c899b48eb..6c5e82139105bf 100644 --- a/Lib/test/exception_hierarchy.txt +++ b/Lib/test/exception_hierarchy.txt @@ -61,4 +61,5 @@ BaseException +-- ImportWarning +-- UnicodeWarning +-- BytesWarning + +-- EncodingWarning +-- ResourceWarning From a95dff28be2ba893731a597cf4f7a5f61c97691f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 31 Jan 2021 09:42:40 +0900 Subject: [PATCH 13/50] Make sys.flags.encoding_warning int --- Python/sysmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 4dc2f801f12f08..b8c141868c05f9 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2554,7 +2554,7 @@ set_flags_from_config(PyObject *flags, PyThreadState *tstate) SetFlag(config->isolated); SetFlagObj(PyBool_FromLong(config->dev_mode)); SetFlag(preconfig->utf8_mode); - SetFlagObj(PyBool_FromLong(config->encoding_warning)); + SetFlag(config->encoding_warning); #undef SetFlagObj #undef SetFlag return 0; From 31fb411aea102eb23702b7eb958f05176d20cdeb Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 31 Jan 2021 09:43:00 +0900 Subject: [PATCH 14/50] Fix text_embed. --- Lib/test/test_embed.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 6833b2540d67d7..32f43653b05b27 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -389,6 +389,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'site_import': 1, 'bytes_warning': 0, + 'encoding_warning': 0, 'inspect': 0, 'interactive': 0, 'optimization_level': 0, From 096a0a3eb415993da11b9149dddeabaf530b5a88 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sun, 31 Jan 2021 10:46:47 +0900 Subject: [PATCH 15/50] Fix test_pickle --- Lib/test/test_pickle.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_pickle.py b/Lib/test/test_pickle.py index 1f5cb103933e00..23c7bd261e85ca 100644 --- a/Lib/test/test_pickle.py +++ b/Lib/test/test_pickle.py @@ -483,7 +483,8 @@ def test_exceptions(self): if exc in (BlockingIOError, ResourceWarning, StopAsyncIteration, - RecursionError): + RecursionError, + EncodingWarning): continue if exc is not OSError and issubclass(exc, OSError): self.assertEqual(reverse_mapping('builtins', name), From 99fc9381e234a7e56fff4d5535b7ce71453990e4 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Sat, 13 Feb 2021 23:26:44 +0900 Subject: [PATCH 16/50] configparser: use io.text_encoding() --- Lib/configparser.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/configparser.py b/Lib/configparser.py index 924cc56a3f150d..3b4cb5e6b2407f 100644 --- a/Lib/configparser.py +++ b/Lib/configparser.py @@ -690,6 +690,7 @@ def read(self, filenames, encoding=None): """ if isinstance(filenames, (str, bytes, os.PathLike)): filenames = [filenames] + encoding = io.text_encoding(encoding) read_ok = [] for filename in filenames: try: From 6fdbcbc842eebedcf3acd60f2bf3d02c647b2a3e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Mon, 22 Feb 2021 14:45:40 +0900 Subject: [PATCH 17/50] Rename option names sys.flags.encoding_warning -> warn_default_encoding -X warn_encoding -> warn_default_encoding PYTHONWARNENCODING -> PYTHONWARNDEFAULTENCODING --- Doc/c-api/init_config.rst | 7 +++++++ Include/cpython/initconfig.h | 2 +- Include/internal/pycore_initconfig.h | 4 ++-- Lib/_pyio.py | 10 ++++------ Lib/test/test_embed.py | 2 +- Lib/test/test_io.py | 2 +- Lib/test/test_sys.py | 2 +- Modules/_io/_iomodule.c | 6 +++--- Modules/_io/clinic/_iomodule.c.h | 4 ++-- Modules/_io/textio.c | 2 +- Python/initconfig.c | 20 ++++++++++---------- Python/preconfig.c | 18 +++++++++--------- Python/sysmodule.c | 4 ++-- 13 files changed, 44 insertions(+), 39 deletions(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index db7c1f43765785..5d13e072a87226 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -583,6 +583,13 @@ PyConfig Default: ``0``. + .. c:member:: int warn_default_encoding + + If equals to 1, emit a ``EncodingWarning`` when ``TextIOWrapper`` + used its default encoding. See :pep:`597` for detail. + + .. versionadded:: 3.10 + .. c:member:: wchar_t* check_hash_pycs_mode Control the validation behavior of hash-based ``.pyc`` files: diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index cc108e1a66f8d5..09f9a2947efef3 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -153,7 +153,7 @@ typedef struct PyConfig { PyWideStringList warnoptions; int site_import; int bytes_warning; - int encoding_warning; + int warn_default_encoding; int inspect; int interactive; int optimization_level; diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 046b8cfce24647..9bc0df5b019fa0 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -102,7 +102,7 @@ typedef struct { int isolated; /* -I option */ int use_environment; /* -E option */ int dev_mode; /* -X dev and PYTHONDEVMODE */ - int encoding_warning; /* -X warn_encoding and PYTHONWARNENCODING */ + int warn_default_encoding; /* -X warn_default_encoding and PYTHONWARNDEFAULTENCODING */ } _PyPreCmdline; #define _PyPreCmdline_INIT \ @@ -110,7 +110,7 @@ typedef struct { .use_environment = -1, \ .isolated = -1, \ .dev_mode = -1, \ - .encoding_warning = -1} + .warn_default_encoding = -1} /* Note: _PyPreCmdline_INIT sets other fields to 0/NULL */ extern void _PyPreCmdline_Clear(_PyPreCmdline *cmdline); diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 521cf795892eaa..33ae72634f9717 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -48,18 +48,16 @@ def text_encoding(encoding, stacklevel=1): Otherwise, return the default text encoding (i.e. "locale"). This function emits EncodingWarning if *encoding* is None and - sys.flags.encoding_warning is true. + sys.flags.warn_default_encoding is true. This function can be used in APIs having encoding=None option. But please consider encoding="utf-8" for new APIs. """ if encoding is None: - if sys.flags.encoding_warning: + if sys.flags.warn_default_encoding: import warnings - warnings.warn( - "'encoding' option is not specified. The default encoding " - "will be changed to 'utf-8' in the future", - EncodingWarning, stacklevel + 2) + warnings.warn("'encoding' option is not specified.", + EncodingWarning, stacklevel + 2) encoding = "locale" return encoding diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 32f43653b05b27..646cd0632edd8c 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -389,7 +389,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'site_import': 1, 'bytes_warning': 0, - 'encoding_warning': 0, + 'warn_default_encoding': 0, 'inspect': 0, 'interactive': 0, 'optimization_level': 0, diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index c5a3b6bf49ff70..a777defe1d8bb6 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4237,7 +4237,7 @@ def test_check_encoding_warning(self): pathlib.Path({filename!r}).read_text() # line 8 ''') - proc = assert_python_ok('-X', 'warn_encoding', '-c', code) + proc = assert_python_ok('-X', 'warn_default_encoding', '-c', code) warnings = proc.err.splitlines() self.assertEqual(len(warnings), 2) self.assertTrue( diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index a9d5ceaeb5cf43..35e6c088ea9c79 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -592,7 +592,7 @@ def test_sys_flags(self): "dont_write_bytecode", "no_user_site", "no_site", "ignore_environment", "verbose", "bytes_warning", "quiet", "hash_randomization", "isolated", "dev_mode", "utf8_mode", - "encoding_warning") + "warn_default_encoding") for attr in attrs: self.assertTrue(hasattr(sys.flags, attr), attr) attr_type = bool if attr == "dev_mode" else int diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 50235b06eecb64..e7d5cac2be6c14 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -519,7 +519,7 @@ When encoding is not None, just return it. Otherwise, return the default text encoding (i.e. "locale"). This function emits EncodingWarning if *encoding* is None and -sys.flags.encoding_warning is true. +sys.flags.warn_default_encoding is true. This function can be used in APIs having encoding=None option. But please consider encoding="utf-8" for new APIs. @@ -527,11 +527,11 @@ But please consider encoding="utf-8" for new APIs. static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=fd956a85976afd07]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=31cd163789637f39]*/ { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyInterpreterState_GetConfig(interp)->encoding_warning) { + if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { PyErr_WarnEx(PyExc_EncodingWarning, "'encoding' option is omitted", stacklevel + 1); } diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 1c626e724e6de7..ac72e8f94d9cd9 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -282,7 +282,7 @@ PyDoc_STRVAR(_io_text_encoding__doc__, "Otherwise, return the default text encoding (i.e. \"locale\").\n" "\n" "This function emits EncodingWarning if *encoding* is None and\n" -"sys.flags.encoding_warning is true.\n" +"sys.flags.warn_default_encoding is true.\n" "\n" "This function can be used in APIs having encoding=None option.\n" "But please consider encoding=\"utf-8\" for new APIs."); @@ -362,4 +362,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=f8081ebe49134854 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c8b593b834aeff7b input=a9049054013a1b77]*/ diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index f071bd59fb184e..24880598e83d4d 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1125,7 +1125,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, if (encoding == NULL) { PyInterpreterState *interp = _PyInterpreterState_GET(); - if (_PyInterpreterState_GetConfig(interp)->encoding_warning) { + if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { PyErr_WarnEx(PyExc_EncodingWarning, "'encoding' option is omitted", 1); } diff --git a/Python/initconfig.c b/Python/initconfig.c index 35c61edd844266..cc6a2a9f86d06b 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -94,7 +94,7 @@ static const char usage_3[] = "\ otherwise activate automatically)\n\ -X pycache_prefix=PATH: enable writing .pyc files to a parallel tree rooted at the\n\ given directory instead of to the code tree\n\ - -X warn_encoding: enable opt-in EncodingWarning for 'encoding=None'\n\ + -X warn_default_encoding: enable opt-in EncodingWarning for 'encoding=None'\n\ \n\ --check-hash-based-pycs always|default|never:\n\ control how Python invalidates hash-based .pyc files\n\ @@ -131,7 +131,7 @@ static const char usage_6[] = " debugger. It can be set to the callable of your debugger of choice.\n" "PYTHONDEVMODE: enable the development mode.\n" "PYTHONPYCACHEPREFIX: root directory for bytecode cache (pyc) files.\n" -"PYTHONWARNENCODING: enable opt-in EncodingWarning for 'encoding=None'.\n"; +"PYTHONWARNDEFAULTENCODING: enable opt-in EncodingWarning for 'encoding=None'.\n"; #if defined(MS_WINDOWS) # define PYTHONHOMEHELP "\\python{major}{minor}" @@ -602,7 +602,7 @@ config_check_consistency(const PyConfig *config) assert(config->malloc_stats >= 0); assert(config->site_import >= 0); assert(config->bytes_warning >= 0); - assert(config->encoding_warning >= 0); + assert(config->warn_default_encoding >= 0); assert(config->inspect >= 0); assert(config->interactive >= 0); assert(config->optimization_level >= 0); @@ -701,7 +701,7 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->parse_argv = 0; config->site_import = -1; config->bytes_warning = -1; - config->encoding_warning = -1; + config->warn_default_encoding = -1; config->inspect = -1; config->interactive = -1; config->optimization_level = -1; @@ -732,7 +732,7 @@ config_init_defaults(PyConfig *config) config->use_environment = 1; config->site_import = 1; config->bytes_warning = 0; - config->encoding_warning = 0; + config->warn_default_encoding = 0; config->inspect = 0; config->interactive = 0; config->optimization_level = 0; @@ -911,7 +911,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(site_import); COPY_ATTR(bytes_warning); - COPY_ATTR(encoding_warning); + COPY_ATTR(warn_default_encoding); COPY_ATTR(inspect); COPY_ATTR(interactive); COPY_ATTR(optimization_level); @@ -1013,7 +1013,7 @@ _PyConfig_AsDict(const PyConfig *config) SET_ITEM_WSTR(platlibdir); SET_ITEM_INT(site_import); SET_ITEM_INT(bytes_warning); - SET_ITEM_INT(encoding_warning); + SET_ITEM_INT(warn_default_encoding); SET_ITEM_INT(inspect); SET_ITEM_INT(interactive); SET_ITEM_INT(optimization_level); @@ -1278,7 +1278,7 @@ _PyConfig_FromDict(PyConfig *config, PyObject *dict) GET_WSTRLIST(warnoptions); GET_UINT(site_import); GET_UINT(bytes_warning); - GET_UINT(encoding_warning); + GET_UINT(warn_default_encoding); GET_UINT(inspect); GET_UINT(interactive); GET_UINT(optimization_level); @@ -2144,8 +2144,8 @@ config_read(PyConfig *config, int compute_path_config) config->parse_argv = 2; } - if (config->encoding_warning < 0) { - config->encoding_warning = 0; + if (config->warn_default_encoding < 0) { + config->warn_default_encoding = 0; } return _PyStatus_OK(); diff --git a/Python/preconfig.c b/Python/preconfig.c index c733753e73244c..561f5354b9bf34 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -169,7 +169,7 @@ _PyPreCmdline_SetConfig(const _PyPreCmdline *cmdline, PyConfig *config) COPY_ATTR(isolated); COPY_ATTR(use_environment); COPY_ATTR(dev_mode); - COPY_ATTR(encoding_warning); + COPY_ATTR(warn_default_encoding); return _PyStatus_OK(); #undef COPY_ATTR @@ -258,21 +258,21 @@ _PyPreCmdline_Read(_PyPreCmdline *cmdline, const PyPreConfig *preconfig) cmdline->dev_mode = 0; } - // encoding_warning - if ((cmdline->encoding_warning < 0) - && (_Py_get_xoption(&cmdline->xoptions, L"warn_encoding") - || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNENCODING"))) + // warn_default_encoding + if ((cmdline->warn_default_encoding < 0) + && (_Py_get_xoption(&cmdline->xoptions, L"warn_default_encoding") + || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNDEFAULTENCODING"))) { - cmdline->encoding_warning = 1; + cmdline->warn_default_encoding = 1; } - if (cmdline->encoding_warning < 0) { - cmdline->encoding_warning = 0; + if (cmdline->warn_default_encoding < 0) { + cmdline->warn_default_encoding = 0; } assert(cmdline->use_environment >= 0); assert(cmdline->isolated >= 0); assert(cmdline->dev_mode >= 0); - assert(cmdline->encoding_warning >= 0); + assert(cmdline->warn_default_encoding >= 0); return _PyStatus_OK(); } diff --git a/Python/sysmodule.c b/Python/sysmodule.c index b8c141868c05f9..dfb0feb60a1a31 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2506,7 +2506,7 @@ static PyStructSequence_Field flags_fields[] = { {"isolated", "-I"}, {"dev_mode", "-X dev"}, {"utf8_mode", "-X utf8"}, - {"encoding_warning", "-X warn_encoding"}, + {"warn_default_encoding", "-X warn_default_encoding"}, {0} }; @@ -2554,7 +2554,7 @@ set_flags_from_config(PyObject *flags, PyThreadState *tstate) SetFlag(config->isolated); SetFlagObj(PyBool_FromLong(config->dev_mode)); SetFlag(preconfig->utf8_mode); - SetFlag(config->encoding_warning); + SetFlag(config->warn_default_encoding); #undef SetFlagObj #undef SetFlag return 0; From 674feff646ebca1265d8bf56e2044ae921f16a5e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Mar 2021 17:17:26 +0900 Subject: [PATCH 18/50] Update docs --- Doc/using/cmdline.rst | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 04e0f3267dbe78..a2652ef2bdf985 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -453,6 +453,9 @@ Miscellaneous options * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel tree rooted at the given directory instead of to the code tree. See also :envvar:`PYTHONPYCACHEPREFIX`. + * ``-X warn_default_encoding`` issues a :class:`EncodingWarning` when + an ``encoding`` option is omitted and the default encoding is locale-specific. + See also :envvar:`PYTHONWARNDEFAULTENCODING`. It also allows passing arbitrary values and retrieving them through the :data:`sys._xoptions` dictionary. @@ -482,6 +485,9 @@ Miscellaneous options The ``-X showalloccount`` option has been removed. + .. versionadded:: 3.10 + The ``-X warn_default_encoding`` option. + .. deprecated-removed:: 3.9 3.10 The ``-X oldparser`` option. @@ -907,6 +913,27 @@ conflict. .. versionadded:: 3.7 +.. envvar:: PYTHONWARNDEFAULTENCODING + + If this environment variable is set to a non-empty string, issue a + :class:`EncodingWarning` when an ``encoding`` option is omitted and + the default encoding is locale-specific. + + This option can be used to find bugs caused by not passing + ``encoding="utf8"`` option. For example:: + + # This code may cause UnicodeDecodeError on Windows. + # encoding="utf8" or "b" mode must be used. + with open(path) as f: + data = json.load(f) + + ``encoding="locale"`` option can be used to specify locale-specific + encoding explicitly since Python 3.10. + + See :pep:`597` for detail. + + .. versionadded:: 3.10 + Debug-mode variables ~~~~~~~~~~~~~~~~~~~~ From d9d850f88091b204f64092b691eee00507d882c4 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 16 Mar 2021 17:20:35 +0900 Subject: [PATCH 19/50] Add NEWS entry --- .../next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst diff --git a/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst b/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst new file mode 100644 index 00000000000000..b157659b51a168 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst @@ -0,0 +1,3 @@ +Implement :pep:`597`: Add ``EncodingWarning`` warning, ``-X +warn_default_encoding``, :envvar:`PYTHONWARNDEFAULTENCODING`, and +``encoding="locale"`` option. From 16463ea2375c118d8fb97fcd0c94f4c8f84f1993 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 17 Mar 2021 17:46:54 +0900 Subject: [PATCH 20/50] Add document for text_encoding and encoding="locale". --- Doc/library/io.rst | 34 ++++++++++++++++++++++++++++++++++ Doc/using/cmdline.rst | 3 ++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index aecbec56866d73..b0f0e71a298e23 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -143,6 +143,32 @@ High-level Module Interface .. versionadded:: 3.8 +.. function:: text_encoding(encoding, stacklevel=1) + + This is a helper function for functions that use :func:`open` or + :class:`TextIOWrapper` and take ``encoding=None`` option. + + This function returns *encoding* if it is not ``None`` and "locale" if + *encoding* is ``None``. + + This function emits an :class:`EncodingWarning` if + ``sys.flags.warn_default_encoding`` is true. *stacklevel* specifies where + the warning is emit for. For example:: + + def read_text(path, encoding=None): + encoding = io.text_encoding(encoding) # stacklevel=1 + with open(path, encoding) as f: + return f.read() + + In this example, an :class:`EncodingWarning` is emit for the caller of the + ``read_text()``. If *stacklevel* is greater than 1, more stack frames are + skipped. + + See :envvar:`PYTHONWARNDEFAULTENCODING` and :pep:`597` for more information. + + .. versionadded:: 3.10 + + .. exception:: BlockingIOError This is a compatibility alias for the builtin :exc:`BlockingIOError` @@ -880,6 +906,11 @@ Text I/O encoded with. It defaults to :func:`locale.getpreferredencoding(False) `. + If ``sys.flags.warn_default_encoding`` is true and the default encoding + is used, this function emits an :class:`EncodingWarning`. You can suppress + the warning by using ``encoding="locale"`` option. + See :envvar:`PYTHONWARNDEFAULTENCODING` and :pep:`597` for more information. + *errors* is an optional string that specifies how encoding and decoding errors are to be handled. Pass ``'strict'`` to raise a :exc:`ValueError` exception if there is an encoding error (the default of ``None`` has the same @@ -930,6 +961,9 @@ Text I/O locale encoding using :func:`locale.setlocale`, use the current locale encoding instead of the user preferred encoding. + .. versionchanged:: 3.10 + *encoding* option now supports ``"locale"`` dummy encoding name. + :class:`TextIOWrapper` provides these data attributes and methods in addition to those from :class:`TextIOBase` and :class:`IOBase`: diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index a2652ef2bdf985..3df4f328febb25 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -928,7 +928,8 @@ conflict. data = json.load(f) ``encoding="locale"`` option can be used to specify locale-specific - encoding explicitly since Python 3.10. + encoding explicitly since Python 3.10. Python won't issue a + :class:`EncodingWarning` for it. See :pep:`597` for detail. From 412d633e54701cba72f95da4a5fff0b93d216a43 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 17 Mar 2021 18:01:29 +0900 Subject: [PATCH 21/50] Suppress EncodingWarning from site.py --- Lib/site.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Lib/site.py b/Lib/site.py index 5f1b31e73d90ad..939893eb5ee93b 100644 --- a/Lib/site.py +++ b/Lib/site.py @@ -170,7 +170,9 @@ def addpackage(sitedir, name, known_paths): fullname = os.path.join(sitedir, name) _trace(f"Processing .pth file: {fullname!r}") try: - f = io.TextIOWrapper(io.open_code(fullname)) + # locale encoding is not ideal especially on Windows. But we have used + # it for a long time. setuptools uses the locale encoding too. + f = io.TextIOWrapper(io.open_code(fullname), encoding="locale") except OSError: return with f: From ee883d1cf34bf8170c894cf7c22b98584fb6fe70 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 18 Mar 2021 15:14:46 +0900 Subject: [PATCH 22/50] Remove io.LOCALE_ENCODING --- Lib/io.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/Lib/io.py b/Lib/io.py index 7a2076e3f1b68a..01f1df80ded297 100644 --- a/Lib/io.py +++ b/Lib/io.py @@ -66,9 +66,6 @@ SEEK_CUR = 1 SEEK_END = 2 -# PEP 597 -LOCALE_ENCODING = "locale" - # Declaring ABCs in C is tricky so we do it here. # Method descriptions and default implementations are inherited from the C # version however. From 6a15e2a67ef38bd7e1fdecaee050143948a8c11e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Thu, 18 Mar 2021 17:16:51 +0900 Subject: [PATCH 23/50] text_encoding() first argument is mandatory. --- Modules/_io/_iomodule.c | 4 ++-- Modules/_io/clinic/_iomodule.c.h | 11 ++++------- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index e7d5cac2be6c14..1e97c90e76b92b 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -509,7 +509,7 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, /*[clinic input] _io.text_encoding - encoding: object = NULL + encoding: object stacklevel: int = 1 / @@ -527,7 +527,7 @@ But please consider encoding="utf-8" for new APIs. static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=31cd163789637f39]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=1c86ec2f6e77f4fb]*/ { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index ac72e8f94d9cd9..67733c64ea476d 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -273,7 +273,7 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw } PyDoc_STRVAR(_io_text_encoding__doc__, -"text_encoding($module, encoding=, stacklevel=1, /)\n" +"text_encoding($module, encoding, stacklevel=1, /)\n" "--\n" "\n" "Helper function to choose the text encoding.\n" @@ -297,15 +297,12 @@ static PyObject * _io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; - PyObject *encoding = NULL; + PyObject *encoding; int stacklevel = 1; - if (!_PyArg_CheckPositional("text_encoding", nargs, 0, 2)) { + if (!_PyArg_CheckPositional("text_encoding", nargs, 1, 2)) { goto exit; } - if (nargs < 1) { - goto skip_optional; - } encoding = args[0]; if (nargs < 2) { goto skip_optional; @@ -362,4 +359,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=c8b593b834aeff7b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=556347a99a2e2a66 input=a9049054013a1b77]*/ From 5d474b438e4b438b01cc1a886b0b1286959e96db Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 06:43:18 +0900 Subject: [PATCH 24/50] Apply suggestions from code review Co-authored-by: Victor Stinner --- Doc/c-api/init_config.rst | 6 ++++-- Doc/library/io.rst | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 5d13e072a87226..c19c15cfd72b5f 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -585,8 +585,10 @@ PyConfig .. c:member:: int warn_default_encoding - If equals to 1, emit a ``EncodingWarning`` when ``TextIOWrapper`` - used its default encoding. See :pep:`597` for detail. + If non-zero, emit a :exc:`EncodingWarning` warning when :class:`io.TextIOWrapper` + uses its default encoding. See :pep:`597` for detail. + + Default: ``0``. .. versionadded:: 3.10 diff --git a/Doc/library/io.rst b/Doc/library/io.rst index b0f0e71a298e23..e8f283e8bb9533 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -152,7 +152,7 @@ High-level Module Interface *encoding* is ``None``. This function emits an :class:`EncodingWarning` if - ``sys.flags.warn_default_encoding`` is true. *stacklevel* specifies where + :data:`sys.flags.warn_default_encoding ` is true. *stacklevel* specifies where the warning is emit for. For example:: def read_text(path, encoding=None): From c17016f8253f8c8ed9a7a4125d11c031a97dd307 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 06:57:57 +0900 Subject: [PATCH 25/50] Simplify _PyPreCmdline and PyConfig --- Include/internal/pycore_initconfig.h | 3 +-- Python/initconfig.c | 7 +------ Python/preconfig.c | 8 ++------ 3 files changed, 4 insertions(+), 14 deletions(-) diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h index 9bc0df5b019fa0..4b009e816b4927 100644 --- a/Include/internal/pycore_initconfig.h +++ b/Include/internal/pycore_initconfig.h @@ -109,8 +109,7 @@ typedef struct { (_PyPreCmdline){ \ .use_environment = -1, \ .isolated = -1, \ - .dev_mode = -1, \ - .warn_default_encoding = -1} + .dev_mode = -1} /* Note: _PyPreCmdline_INIT sets other fields to 0/NULL */ extern void _PyPreCmdline_Clear(_PyPreCmdline *cmdline); diff --git a/Python/initconfig.c b/Python/initconfig.c index e19fd02a3959a1..27ae48dd3c97c8 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -701,7 +701,7 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->parse_argv = 0; config->site_import = -1; config->bytes_warning = -1; - config->warn_default_encoding = -1; + config->warn_default_encoding = 0; config->inspect = -1; config->interactive = -1; config->optimization_level = -1; @@ -732,7 +732,6 @@ config_init_defaults(PyConfig *config) config->use_environment = 1; config->site_import = 1; config->bytes_warning = 0; - config->warn_default_encoding = 0; config->inspect = 0; config->interactive = 0; config->optimization_level = 0; @@ -2144,10 +2143,6 @@ config_read(PyConfig *config, int compute_path_config) config->parse_argv = 2; } - if (config->warn_default_encoding < 0) { - config->warn_default_encoding = 0; - } - return _PyStatus_OK(); } diff --git a/Python/preconfig.c b/Python/preconfig.c index 561f5354b9bf34..ae1cc3f90fca7f 100644 --- a/Python/preconfig.c +++ b/Python/preconfig.c @@ -259,15 +259,11 @@ _PyPreCmdline_Read(_PyPreCmdline *cmdline, const PyPreConfig *preconfig) } // warn_default_encoding - if ((cmdline->warn_default_encoding < 0) - && (_Py_get_xoption(&cmdline->xoptions, L"warn_default_encoding") - || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNDEFAULTENCODING"))) + if (_Py_get_xoption(&cmdline->xoptions, L"warn_default_encoding") + || _Py_GetEnv(cmdline->use_environment, "PYTHONWARNDEFAULTENCODING")) { cmdline->warn_default_encoding = 1; } - if (cmdline->warn_default_encoding < 0) { - cmdline->warn_default_encoding = 0; - } assert(cmdline->use_environment >= 0); assert(cmdline->isolated >= 0); From 03f971c8172f6a271ef666ef1941237c90b62b3f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 07:24:18 +0900 Subject: [PATCH 26/50] Update EncodingWarning doc --- Doc/library/exceptions.rst | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 366c5711ea0500..ea20d5c0476a6e 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -690,8 +690,11 @@ The following exceptions are used as warning categories; see the .. exception:: EncodingWarning - Base class for warnings about encodings when those warnings are intended for - other Python developers. + Base class for warnings about encodings. + + :class:`io.TextIOWrapper`, :func:`open`, and other functions having + ``encoding=None`` option and locale specific default encoding may emit this + warning. See :pep:`597` for more details. .. versionadded:: 3.10 From 9d26b7a047493ddd0b02f297ed6683adcac62a55 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 09:40:21 +0900 Subject: [PATCH 27/50] Update document --- Doc/c-api/init_config.rst | 4 +-- Doc/library/exceptions.rst | 4 +-- Doc/library/io.rst | 59 +++++++++++++++++++++++++++++++++----- Doc/using/cmdline.rst | 21 +++----------- 4 files changed, 59 insertions(+), 29 deletions(-) diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index c19c15cfd72b5f..29fbb68195b347 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -586,8 +586,8 @@ PyConfig .. c:member:: int warn_default_encoding If non-zero, emit a :exc:`EncodingWarning` warning when :class:`io.TextIOWrapper` - uses its default encoding. See :pep:`597` for detail. - + uses its default encoding. See :ref:`io-encoding-warning` for details. + Default: ``0``. .. versionadded:: 3.10 diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index ea20d5c0476a6e..05684ca4fdac9b 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -692,9 +692,7 @@ The following exceptions are used as warning categories; see the Base class for warnings about encodings. - :class:`io.TextIOWrapper`, :func:`open`, and other functions having - ``encoding=None`` option and locale specific default encoding may emit this - warning. See :pep:`597` for more details. + See :ref:`io-encoding-warning` for details. .. versionadded:: 3.10 diff --git a/Doc/library/io.rst b/Doc/library/io.rst index e8f283e8bb9533..a04d2a8795dbc1 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -106,6 +106,54 @@ stream by opening a file in binary mode with buffering disabled:: The raw stream API is described in detail in the docs of :class:`RawIOBase`. +.. _io-text-encoding: + +Text Encoding +------------- + +The default encoding of :class:`TextIOWrapper` and :func:`open` is +locale-specific. (:func:`locale.getpreferredencoding(False) `) + +But many developers forget to specify encoding when opening text files +encoded in UTF-8 (e.g. JSON, TOML, Markdown, etc...) since most Unix +platforms uses UTF-8 locale by default. It cause a bug because locale-specific +encoding is not UTF-8 for most Windows users. For example:: + + # may not work on Windows when non-ASCII characters in the file. + with open("README.md") as f: + long_description = f.read() + +Additionally, Python may change the default text encoding to UTF-8 in the +future, although there is no plan yet. + +So it is highly recommended to specify encoding explicitly when opening text +files. If you want to use UTF-8, specify ``encoding="utf-8"``. If you need to +use locale-specific encoding, ``encoding="locale"`` is supported since Python +3.10. + +When you need to run code using the default encoding to open UTF-8 files on +Windows, you can enable the UTF-8 mode. See also the +:ref:`UTF-8 mode on Windows ` + +.. _io-encoding-warning: + +Opt-in EncodingWarning +^^^^^^^^^^^^^^^^^^^^^^ + +.. versionadded:: 3.10 + See :pep:`597` for more details. + +To find where the default encoding is used, you can use +a ``-X warn_default_encoding`` command line argument or a +:envvar:`PYTHONWARNDEFAULTENCODING` environment variable to emit +an :exc:`EncodingWarning` when the defaut encoding is used. + +If you are providing APIs using :func:`open` or :class:`TextIOWrapper` and +having ``encoding=None`` parameter, you can use :func:`text_encoding` to emit +an :exc:`EncodingWarning` to the user too. But please consider using UTF-8 +by default (i.e. ``encoding="utf-8"``). + + High-level Module Interface --------------------------- @@ -146,7 +194,7 @@ High-level Module Interface .. function:: text_encoding(encoding, stacklevel=1) This is a helper function for functions that use :func:`open` or - :class:`TextIOWrapper` and take ``encoding=None`` option. + :class:`TextIOWrapper` and take ``encoding=None`` argument. This function returns *encoding* if it is not ``None`` and "locale" if *encoding* is ``None``. @@ -164,7 +212,7 @@ High-level Module Interface ``read_text()``. If *stacklevel* is greater than 1, more stack frames are skipped. - See :envvar:`PYTHONWARNDEFAULTENCODING` and :pep:`597` for more information. + See :ref:`io-default-encoding` for more information. .. versionadded:: 3.10 @@ -905,11 +953,8 @@ Text I/O *encoding* gives the name of the encoding that the stream will be decoded or encoded with. It defaults to :func:`locale.getpreferredencoding(False) `. - - If ``sys.flags.warn_default_encoding`` is true and the default encoding - is used, this function emits an :class:`EncodingWarning`. You can suppress - the warning by using ``encoding="locale"`` option. - See :envvar:`PYTHONWARNDEFAULTENCODING` and :pep:`597` for more information. + ``encoding="locale"`` can be used to specify the locale specific encoding + explicitly. See :ref:`io-text-encoding` for more information. *errors* is an optional string that specifies how encoding and decoding errors are to be handled. Pass ``'strict'`` to raise a :exc:`ValueError` diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 3df4f328febb25..93367017175191 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -453,8 +453,8 @@ Miscellaneous options * ``-X pycache_prefix=PATH`` enables writing ``.pyc`` files to a parallel tree rooted at the given directory instead of to the code tree. See also :envvar:`PYTHONPYCACHEPREFIX`. - * ``-X warn_default_encoding`` issues a :class:`EncodingWarning` when - an ``encoding`` option is omitted and the default encoding is locale-specific. + * ``-X warn_default_encoding`` issues a :class:`EncodingWarning` when the + locale-specific default encoding is used. See also :envvar:`PYTHONWARNDEFAULTENCODING`. It also allows passing arbitrary values and retrieving them through the @@ -916,22 +916,9 @@ conflict. .. envvar:: PYTHONWARNDEFAULTENCODING If this environment variable is set to a non-empty string, issue a - :class:`EncodingWarning` when an ``encoding`` option is omitted and - the default encoding is locale-specific. + :class:`EncodingWarning` when the locale-specific default encoding is used. - This option can be used to find bugs caused by not passing - ``encoding="utf8"`` option. For example:: - - # This code may cause UnicodeDecodeError on Windows. - # encoding="utf8" or "b" mode must be used. - with open(path) as f: - data = json.load(f) - - ``encoding="locale"`` option can be used to specify locale-specific - encoding explicitly since Python 3.10. Python won't issue a - :class:`EncodingWarning` for it. - - See :pep:`597` for detail. + See :ref:`io-encoding-warning` for details. .. versionadded:: 3.10 From 60e74cf75f127587f87e027cdcdf9ce377d2bb81 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 09:40:29 +0900 Subject: [PATCH 28/50] tweak warning message --- Lib/_pyio.py | 2 +- Modules/_io/_iomodule.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 33ae72634f9717..64fe7eeadc29d9 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -56,7 +56,7 @@ def text_encoding(encoding, stacklevel=1): if encoding is None: if sys.flags.warn_default_encoding: import warnings - warnings.warn("'encoding' option is not specified.", + warnings.warn("'encoding' argument is not specified.", EncodingWarning, stacklevel + 2) encoding = "locale" return encoding diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 1e97c90e76b92b..bda459613be591 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -533,7 +533,7 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) PyInterpreterState *interp = _PyInterpreterState_GET(); if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { PyErr_WarnEx(PyExc_EncodingWarning, - "'encoding' option is omitted", stacklevel + 1); + "'encoding' argument is not specified", stacklevel + 1); } Py_INCREF(_PyIO_str_locale); return _PyIO_str_locale; From a505b5f9f3f6038882edd6eec0d38f68f24c4f77 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 09:44:23 +0900 Subject: [PATCH 29/50] Use stacklevel=2 for text_encoding() default --- Doc/library/io.rst | 11 +++++------ Lib/_pyio.py | 4 ++-- Modules/_io/_iomodule.c | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index a04d2a8795dbc1..9f997f4c0d5ab9 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -191,7 +191,7 @@ High-level Module Interface .. versionadded:: 3.8 -.. function:: text_encoding(encoding, stacklevel=1) +.. function:: text_encoding(encoding, stacklevel=2) This is a helper function for functions that use :func:`open` or :class:`TextIOWrapper` and take ``encoding=None`` argument. @@ -200,17 +200,16 @@ High-level Module Interface *encoding* is ``None``. This function emits an :class:`EncodingWarning` if - :data:`sys.flags.warn_default_encoding ` is true. *stacklevel* specifies where - the warning is emit for. For example:: + :data:`sys.flags.warn_default_encoding ` is true. + *stacklevel* specifies where the warning is emit for. For example:: def read_text(path, encoding=None): - encoding = io.text_encoding(encoding) # stacklevel=1 + encoding = io.text_encoding(encoding) # stacklevel=2 with open(path, encoding) as f: return f.read() In this example, an :class:`EncodingWarning` is emit for the caller of the - ``read_text()``. If *stacklevel* is greater than 1, more stack frames are - skipped. + ``read_text()``. See :ref:`io-default-encoding` for more information. diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 64fe7eeadc29d9..f960aed1b96595 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -40,7 +40,7 @@ _CHECK_ERRORS = _IOBASE_EMITS_UNRAISABLE -def text_encoding(encoding, stacklevel=1): +def text_encoding(encoding, stacklevel=2): """ Helper function to choose the text encoding. @@ -57,7 +57,7 @@ def text_encoding(encoding, stacklevel=1): if sys.flags.warn_default_encoding: import warnings warnings.warn("'encoding' argument is not specified.", - EncodingWarning, stacklevel + 2) + EncodingWarning, stacklevel + 1) encoding = "locale" return encoding diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index bda459613be591..cf24a656686cb7 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -510,7 +510,7 @@ _io_open_impl(PyObject *module, PyObject *file, const char *mode, /*[clinic input] _io.text_encoding encoding: object - stacklevel: int = 1 + stacklevel: int = 2 / Helper function to choose the text encoding. @@ -533,7 +533,7 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) PyInterpreterState *interp = _PyInterpreterState_GET(); if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { PyErr_WarnEx(PyExc_EncodingWarning, - "'encoding' argument is not specified", stacklevel + 1); + "'encoding' argument is not specified", stacklevel); } Py_INCREF(_PyIO_str_locale); return _PyIO_str_locale; From cbe22e23a8cd689bef59b758c5554603c4c9f607 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 09:45:41 +0900 Subject: [PATCH 30/50] fixup --- Doc/library/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 9f997f4c0d5ab9..54406cce0c8786 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -112,7 +112,7 @@ Text Encoding ------------- The default encoding of :class:`TextIOWrapper` and :func:`open` is -locale-specific. (:func:`locale.getpreferredencoding(False) `) +locale-specific (:func:`locale.getpreferredencoding(False) `). But many developers forget to specify encoding when opening text files encoded in UTF-8 (e.g. JSON, TOML, Markdown, etc...) since most Unix From a9f9f041c30fc9f229eb4cf574fb8a080c013653 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 10:02:42 +0900 Subject: [PATCH 31/50] tweak for readability --- Lib/_pyio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index f960aed1b96595..5385d3b1fa437e 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -54,11 +54,11 @@ def text_encoding(encoding, stacklevel=2): But please consider encoding="utf-8" for new APIs. """ if encoding is None: + encoding = "locale" if sys.flags.warn_default_encoding: import warnings warnings.warn("'encoding' argument is not specified.", EncodingWarning, stacklevel + 1) - encoding = "locale" return encoding From 3bea88f8ed07f689670dfb9abdee6f840912206b Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 10:08:22 +0900 Subject: [PATCH 32/50] make clinic --- Modules/_io/_iomodule.c | 2 +- Modules/_io/clinic/_iomodule.c.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index cf24a656686cb7..d748f1a9dc3e55 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -527,7 +527,7 @@ But please consider encoding="utf-8" for new APIs. static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=1c86ec2f6e77f4fb]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=2f8639413d4a6e1a]*/ { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 67733c64ea476d..557b7f3fc3b5c1 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -273,7 +273,7 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw } PyDoc_STRVAR(_io_text_encoding__doc__, -"text_encoding($module, encoding, stacklevel=1, /)\n" +"text_encoding($module, encoding, stacklevel=2, /)\n" "--\n" "\n" "Helper function to choose the text encoding.\n" @@ -298,7 +298,7 @@ _io_text_encoding(PyObject *module, PyObject *const *args, Py_ssize_t nargs) { PyObject *return_value = NULL; PyObject *encoding; - int stacklevel = 1; + int stacklevel = 2; if (!_PyArg_CheckPositional("text_encoding", nargs, 1, 2)) { goto exit; @@ -359,4 +359,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=556347a99a2e2a66 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0debb6d44de1be64 input=a9049054013a1b77]*/ From d260a4cf2155403c83327c876497e2d710ed02cc Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 10:11:22 +0900 Subject: [PATCH 33/50] fix doc build error --- Doc/library/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 54406cce0c8786..51c955be086c9c 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -211,7 +211,7 @@ High-level Module Interface In this example, an :class:`EncodingWarning` is emit for the caller of the ``read_text()``. - See :ref:`io-default-encoding` for more information. + See :ref:`io-text-encoding` for more information. .. versionadded:: 3.10 From 049a2690a8a00d766024445adb90d27fae110377 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 10:24:11 +0900 Subject: [PATCH 34/50] tweak warning message --- Modules/_io/textio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 86725dc5c97f41..36d65f5813950c 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1127,7 +1127,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, PyInterpreterState *interp = _PyInterpreterState_GET(); if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { PyErr_WarnEx(PyExc_EncodingWarning, - "'encoding' option is omitted", 1); + "'encoding' argument is not specified", 1); } } else if (strcmp(encoding, "locale") == 0) { From 018ba64e7d40c84e6ac83919a9890bc6a10fe321 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Fri, 19 Mar 2021 20:52:38 +0900 Subject: [PATCH 35/50] fixup --- Doc/library/io.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 51c955be086c9c..516633db02720a 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -200,8 +200,9 @@ High-level Module Interface *encoding* is ``None``. This function emits an :class:`EncodingWarning` if - :data:`sys.flags.warn_default_encoding ` is true. - *stacklevel* specifies where the warning is emit for. For example:: + :data:`sys.flags.warn_default_encoding ` is true and *encoding* + is None. *stacklevel* specifies where the warning is emit for. For + example:: def read_text(path, encoding=None): encoding = io.text_encoding(encoding) # stacklevel=2 From 3a9623ee62569aae558393c1abd442bcca9bfe9e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 12:08:38 +0900 Subject: [PATCH 36/50] Fix subprocess --- Lib/subprocess.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Lib/subprocess.py b/Lib/subprocess.py index d375514b2dd0a6..783885dae98cf0 100644 --- a/Lib/subprocess.py +++ b/Lib/subprocess.py @@ -701,7 +701,7 @@ def _use_posix_spawn(): _USE_POSIX_SPAWN = _use_posix_spawn() -class Popen(object): +class Popen: """ Execute a child program in a new process. For a complete description of the arguments see the Python documentation. @@ -852,6 +852,13 @@ def __init__(self, args, bufsize=-1, executable=None, self.text_mode = encoding or errors or text or universal_newlines + # PEP 597: We suppress the EncodingWarning in subprocess module + # for now (at Python 3.10), because we focus on files for now. + # This will be changed to encoding = io.text_encoding(encoding) + # in the future. + if self.text_mode and encoding is None: + self.encoding = encoding = "locale" + # How long to resume waiting on a child after the first ^C. # There is no right value for this. The purpose is to be polite # yet remain good for interactive users trying to exit a tool. From 737059e90729ef91f04fc4c60ab4cc6fd1c0f9eb Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 13:06:36 +0900 Subject: [PATCH 37/50] Update Doc/library/io.rst Co-authored-by: CAM Gerlach --- Doc/library/io.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 516633db02720a..bb8d68da1c189e 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -193,23 +193,23 @@ High-level Module Interface .. function:: text_encoding(encoding, stacklevel=2) - This is a helper function for functions that use :func:`open` or - :class:`TextIOWrapper` and take ``encoding=None`` argument. + This is a helper function for callables that use :func:`open` or + :class:`TextIOWrapper` and have an ``encoding=None`` parameter. - This function returns *encoding* if it is not ``None`` and "locale" if + This function returns *encoding* if it is not ``None`` and ``"locale"`` if *encoding* is ``None``. This function emits an :class:`EncodingWarning` if :data:`sys.flags.warn_default_encoding ` is true and *encoding* - is None. *stacklevel* specifies where the warning is emit for. For - example:: + is None. *stacklevel* specifies where the warning is emitted. + For example:: def read_text(path, encoding=None): encoding = io.text_encoding(encoding) # stacklevel=2 with open(path, encoding) as f: return f.read() - In this example, an :class:`EncodingWarning` is emit for the caller of the + In this example, an :class:`EncodingWarning` is emitted for the caller of ``read_text()``. See :ref:`io-text-encoding` for more information. From 6a622116f1c3f2bb089562418efcac24aef82630 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 13:06:52 +0900 Subject: [PATCH 38/50] Update Doc/library/io.rst Co-authored-by: CAM Gerlach --- Doc/library/io.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index bb8d68da1c189e..4245942d6d086e 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -123,8 +123,8 @@ encoding is not UTF-8 for most Windows users. For example:: with open("README.md") as f: long_description = f.read() -Additionally, Python may change the default text encoding to UTF-8 in the -future, although there is no plan yet. +Additionally, while there is no concrete plan as of yet, Python may change +the default text file encoding to UTF-8 in the future. So it is highly recommended to specify encoding explicitly when opening text files. If you want to use UTF-8, specify ``encoding="utf-8"``. If you need to From 54c7dc65ae50396d3991a97aed5e67564c7b72ea Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 14:48:34 +0900 Subject: [PATCH 39/50] Update Doc/library/io.rst Co-authored-by: CAM Gerlach --- Doc/library/io.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 4245942d6d086e..b9d0adbdf55625 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -131,9 +131,9 @@ files. If you want to use UTF-8, specify ``encoding="utf-8"``. If you need to use locale-specific encoding, ``encoding="locale"`` is supported since Python 3.10. -When you need to run code using the default encoding to open UTF-8 files on -Windows, you can enable the UTF-8 mode. See also the -:ref:`UTF-8 mode on Windows ` +When you need to run existing code on Windows that attempts to opens +UTF-8 files using the default locale encoding, you can enable the UTF-8 +mode. See :ref:`UTF-8 mode on Windows `. .. _io-encoding-warning: From 5b2830bc1577b7813dad270dfa21d4867aa0f1ff Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 14:49:44 +0900 Subject: [PATCH 40/50] Update Doc/library/io.rst Co-authored-by: CAM Gerlach --- Doc/library/io.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index b9d0adbdf55625..c6817326846389 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -143,10 +143,10 @@ Opt-in EncodingWarning .. versionadded:: 3.10 See :pep:`597` for more details. -To find where the default encoding is used, you can use -a ``-X warn_default_encoding`` command line argument or a -:envvar:`PYTHONWARNDEFAULTENCODING` environment variable to emit -an :exc:`EncodingWarning` when the defaut encoding is used. +To find where the default locale encoding is used, you can enable +the ``-X warn_default_encoding`` command line option or set the +:envvar:`PYTHONWARNDEFAULTENCODING` environment variable, which will +emit an :exc:`EncodingWarning` when the default encoding is used. If you are providing APIs using :func:`open` or :class:`TextIOWrapper` and having ``encoding=None`` parameter, you can use :func:`text_encoding` to emit From 14f2a6eea58ab07f01c9b3c375e2f9a783801f6e Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 16:17:28 +0900 Subject: [PATCH 41/50] Apply suggestions from code review Co-authored-by: CAM Gerlach --- Doc/library/io.rst | 16 ++++++++-------- Doc/using/cmdline.rst | 2 +- Lib/_pyio.py | 13 +++++++------ Modules/_io/_iomodule.c | 2 +- Modules/_io/textio.c | 2 +- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index c6817326846389..8de0d3374a00e8 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -114,22 +114,22 @@ Text Encoding The default encoding of :class:`TextIOWrapper` and :func:`open` is locale-specific (:func:`locale.getpreferredencoding(False) `). -But many developers forget to specify encoding when opening text files +However, many developers forget to specify the encoding when opening text files encoded in UTF-8 (e.g. JSON, TOML, Markdown, etc...) since most Unix -platforms uses UTF-8 locale by default. It cause a bug because locale-specific +platforms use UTF-8 locale by default. This causes bugs because the locale encoding is not UTF-8 for most Windows users. For example:: - # may not work on Windows when non-ASCII characters in the file. + # May not work on Windows when non-ASCII characters in the file. with open("README.md") as f: long_description = f.read() Additionally, while there is no concrete plan as of yet, Python may change the default text file encoding to UTF-8 in the future. -So it is highly recommended to specify encoding explicitly when opening text -files. If you want to use UTF-8, specify ``encoding="utf-8"``. If you need to -use locale-specific encoding, ``encoding="locale"`` is supported since Python -3.10. +Accordingly, it is highly recommended that you specify the encoding +explicitly when opening text files. If you want to use UTF-8, pass +``encoding="utf-8"``. To use the current locale encoding, +``encoding="locale"`` is supported in Python 3.10. When you need to run existing code on Windows that attempts to opens UTF-8 files using the default locale encoding, you can enable the UTF-8 @@ -953,7 +953,7 @@ Text I/O *encoding* gives the name of the encoding that the stream will be decoded or encoded with. It defaults to :func:`locale.getpreferredencoding(False) `. - ``encoding="locale"`` can be used to specify the locale specific encoding + ``encoding="locale"`` can be used to specify the current locale's encoding explicitly. See :ref:`io-text-encoding` for more information. *errors* is an optional string that specifies how encoding and decoding diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 93367017175191..1493c7c9017548 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -454,7 +454,7 @@ Miscellaneous options tree rooted at the given directory instead of to the code tree. See also :envvar:`PYTHONPYCACHEPREFIX`. * ``-X warn_default_encoding`` issues a :class:`EncodingWarning` when the - locale-specific default encoding is used. + locale-specific default encoding is used for opening files. See also :envvar:`PYTHONWARNDEFAULTENCODING`. It also allows passing arbitrary values and retrieving them through the diff --git a/Lib/_pyio.py b/Lib/_pyio.py index 5385d3b1fa437e..d5d16a60a10bb2 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -42,22 +42,23 @@ def text_encoding(encoding, stacklevel=2): """ - Helper function to choose the text encoding. + A helper function to choose the text encoding. When encoding is not None, just return it. Otherwise, return the default text encoding (i.e. "locale"). - This function emits EncodingWarning if *encoding* is None and - sys.flags.warn_default_encoding is true. + This function emits an EncodingWarning if *encoding* is None and + sys.flags.encoding_warning is True. - This function can be used in APIs having encoding=None option. - But please consider encoding="utf-8" for new APIs. + This can be used in APIs with an encoding=None parameter + that pass it to TextIOWrapper or open. + However, please consider using encoding="utf-8" for new APIs. """ if encoding is None: encoding = "locale" if sys.flags.warn_default_encoding: import warnings - warnings.warn("'encoding' argument is not specified.", + warnings.warn("'encoding' argument not specified.", EncodingWarning, stacklevel + 1) return encoding diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index d748f1a9dc3e55..14cf6126682e89 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -533,7 +533,7 @@ _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) PyInterpreterState *interp = _PyInterpreterState_GET(); if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { PyErr_WarnEx(PyExc_EncodingWarning, - "'encoding' argument is not specified", stacklevel); + "'encoding' argument not specified", stacklevel); } Py_INCREF(_PyIO_str_locale); return _PyIO_str_locale; diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c index 36d65f5813950c..6f89a879c9c2bf 100644 --- a/Modules/_io/textio.c +++ b/Modules/_io/textio.c @@ -1127,7 +1127,7 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer, PyInterpreterState *interp = _PyInterpreterState_GET(); if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) { PyErr_WarnEx(PyExc_EncodingWarning, - "'encoding' argument is not specified", 1); + "'encoding' argument not specified", 1); } } else if (strcmp(encoding, "locale") == 0) { From 06e2a32e7d28f15640ce4816a446304df7198e71 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 16:05:54 +0900 Subject: [PATCH 42/50] Move EncodingWarnings --- Doc/library/exceptions.rst | 18 +++++++++--------- Doc/whatsnew/3.10.rst | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/Doc/library/exceptions.rst b/Doc/library/exceptions.rst index 05684ca4fdac9b..40ccde72d07cc3 100644 --- a/Doc/library/exceptions.rst +++ b/Doc/library/exceptions.rst @@ -688,15 +688,6 @@ The following exceptions are used as warning categories; see the Base class for warnings generated by user code. -.. exception:: EncodingWarning - - Base class for warnings about encodings. - - See :ref:`io-encoding-warning` for details. - - .. versionadded:: 3.10 - - .. exception:: DeprecationWarning Base class for warnings about deprecated features when those warnings are @@ -750,6 +741,15 @@ The following exceptions are used as warning categories; see the Base class for warnings related to Unicode. +.. exception:: EncodingWarning + + Base class for warnings related to encodings. + + See :ref:`io-encoding-warning` for details. + + .. versionadded:: 3.10 + + .. exception:: BytesWarning Base class for warnings related to :class:`bytes` and :class:`bytearray`. diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index c4c282e5a04eae..0daf50d5b123d1 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -444,6 +444,24 @@ For the full specification see :pep:`634`. Motivation and rationale are in :pep:`635`, and a longer tutorial is in :pep:`636`. +.. _whatsnew310-pep597 + +Optional ``EncodingWarning`` and ``encoding="locale"`` option +------------------------------------------------------------- + +The default encoding of :class:`TextIOWrapper` and :func:`open` is +platform and locale dependent. Since UTF-8 is used on most Unix +platforms, omitting ``encoding`` option when opening UTF-8 files +(e.g. JSON, YAML, TOML, Markdown) is very common bug. For example:: + + # BUG: "b" mode or encoding="utf-8" should be used. + with open("data.json") as f: + data = json.laod(f) + +To find this type of bugs, optional ``EncodingWarning`` is added. + + + New Features Related to Type Annotations ======================================== From 27d49d269827ee73132a195210a21af4ca928081 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 16:06:32 +0900 Subject: [PATCH 43/50] fix comment --- Lib/test/test_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py index 22490be6a79486..c731302a9f22f6 100644 --- a/Lib/test/test_io.py +++ b/Lib/test/test_io.py @@ -4251,7 +4251,7 @@ def test_check_encoding_errors(self): def test_check_encoding_warning(self): # PEP 597: Raise warning when encoding is not specified - # and dev mode is enabled. + # and sys.flags.warn_default_encoding is set. mod = self.io.__name__ filename = __file__ code = textwrap.dedent(f'''\ From 80f4644247ff073a07fc7be1edc22f1e55afe77c Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 16:24:13 +0900 Subject: [PATCH 44/50] fix text_encoding() docstring --- Modules/_io/_iomodule.c | 10 +++++----- Modules/_io/clinic/_iomodule.c.h | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Modules/_io/_iomodule.c b/Modules/_io/_iomodule.c index 14cf6126682e89..652c2ce5b0d61f 100644 --- a/Modules/_io/_iomodule.c +++ b/Modules/_io/_iomodule.c @@ -513,21 +513,21 @@ _io.text_encoding stacklevel: int = 2 / -Helper function to choose the text encoding. +A helper function to choose the text encoding. When encoding is not None, just return it. Otherwise, return the default text encoding (i.e. "locale"). -This function emits EncodingWarning if *encoding* is None and +This function emits an EncodingWarning if encoding is None and sys.flags.warn_default_encoding is true. -This function can be used in APIs having encoding=None option. -But please consider encoding="utf-8" for new APIs. +This can be used in APIs with an encoding=None parameter. +However, please consider using encoding="utf-8" for new APIs. [clinic start generated code]*/ static PyObject * _io_text_encoding_impl(PyObject *module, PyObject *encoding, int stacklevel) -/*[clinic end generated code: output=91b2cfea6934cc0c input=2f8639413d4a6e1a]*/ +/*[clinic end generated code: output=91b2cfea6934cc0c input=bf70231213e2a7b4]*/ { if (encoding == NULL || encoding == Py_None) { PyInterpreterState *interp = _PyInterpreterState_GET(); diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 557b7f3fc3b5c1..91c55b1816cd82 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -276,16 +276,16 @@ PyDoc_STRVAR(_io_text_encoding__doc__, "text_encoding($module, encoding, stacklevel=2, /)\n" "--\n" "\n" -"Helper function to choose the text encoding.\n" +"A helper function to choose the text encoding.\n" "\n" "When encoding is not None, just return it.\n" "Otherwise, return the default text encoding (i.e. \"locale\").\n" "\n" -"This function emits EncodingWarning if *encoding* is None and\n" +"This function emits an EncodingWarning if encoding is None and\n" "sys.flags.warn_default_encoding is true.\n" "\n" -"This function can be used in APIs having encoding=None option.\n" -"But please consider encoding=\"utf-8\" for new APIs."); +"This can be used in APIs with an encoding=None parameter.\n" +"However, please consider using encoding=\"utf-8\" for new APIs."); #define _IO_TEXT_ENCODING_METHODDEF \ {"text_encoding", (PyCFunction)(void(*)(void))_io_text_encoding, METH_FASTCALL, _io_text_encoding__doc__}, @@ -359,4 +359,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=0debb6d44de1be64 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=06e055d1d80b835d input=a9049054013a1b77]*/ From 6ad0e7f740d805a1498ba7c2b6aff2c95303e2af Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 16:27:35 +0900 Subject: [PATCH 45/50] update what's new --- Doc/whatsnew/3.10.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 0daf50d5b123d1..dc3b67efb9d03b 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -459,7 +459,13 @@ platforms, omitting ``encoding`` option when opening UTF-8 files data = json.laod(f) To find this type of bugs, optional ``EncodingWarning`` is added. +It is emitted when :data:`sys.flags.warn_default_encoding ` +is true and locale-specific default encoding is used. +``-X warn_default_encoding`` option and :envvar:`PYTHONWARNDEFAULTENCODING` +are added to enable the warning. + +See :ref:`io-text-encoding` for more information. New Features Related to Type Annotations From 73b27f1c5edea9b4ec13fb43f8ffd87a271a136a Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Tue, 23 Mar 2021 16:41:54 +0900 Subject: [PATCH 46/50] fix doc build --- Doc/whatsnew/3.10.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index dc3b67efb9d03b..923720659fcaaa 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -444,7 +444,7 @@ For the full specification see :pep:`634`. Motivation and rationale are in :pep:`635`, and a longer tutorial is in :pep:`636`. -.. _whatsnew310-pep597 +.. _whatsnew310-pep597: Optional ``EncodingWarning`` and ``encoding="locale"`` option ------------------------------------------------------------- From c149d65fa26bd8cabbfa02d92ed679daf58c4f6f Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 24 Mar 2021 11:30:09 +0900 Subject: [PATCH 47/50] Update Doc/library/io.rst Co-authored-by: CAM Gerlach --- Doc/library/io.rst | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 8de0d3374a00e8..edef84940d0f08 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -148,10 +148,12 @@ the ``-X warn_default_encoding`` command line option or set the :envvar:`PYTHONWARNDEFAULTENCODING` environment variable, which will emit an :exc:`EncodingWarning` when the default encoding is used. -If you are providing APIs using :func:`open` or :class:`TextIOWrapper` and -having ``encoding=None`` parameter, you can use :func:`text_encoding` to emit -an :exc:`EncodingWarning` to the user too. But please consider using UTF-8 -by default (i.e. ``encoding="utf-8"``). +If you are providing an API that uses :func:`open` or +:class:`TextIOWrapper` and passes ``encoding=None`` as a parameter, you +can use :func:`text_encoding` so that callers of the API will emit an +:exc:`EncodingWarning` if they don't pass an ``encoding``. However, +please consider using UTF-8 by default (i.e. ``encoding="utf-8"``) for +new APIs. High-level Module Interface From 4eb7655c2831e0f1baae40ce5049df540916e9bf Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 24 Mar 2021 11:31:59 +0900 Subject: [PATCH 48/50] Apply suggestions from code review Co-authored-by: CAM Gerlach --- Lib/_pyio.py | 2 +- .../next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/_pyio.py b/Lib/_pyio.py index d5d16a60a10bb2..0f182d42402063 100644 --- a/Lib/_pyio.py +++ b/Lib/_pyio.py @@ -48,7 +48,7 @@ def text_encoding(encoding, stacklevel=2): Otherwise, return the default text encoding (i.e. "locale"). This function emits an EncodingWarning if *encoding* is None and - sys.flags.encoding_warning is True. + sys.flags.warn_default_encoding is true. This can be used in APIs with an encoding=None parameter that pass it to TextIOWrapper or open. diff --git a/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst b/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst index b157659b51a168..b79a49c881bcc6 100644 --- a/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst +++ b/Misc/NEWS.d/next/Library/2021-03-16-17-20-33.bpo-43510.-BeQH_.rst @@ -1,3 +1,3 @@ Implement :pep:`597`: Add ``EncodingWarning`` warning, ``-X -warn_default_encoding``, :envvar:`PYTHONWARNDEFAULTENCODING`, and -``encoding="locale"`` option. +warn_default_encoding`` option, :envvar:`PYTHONWARNDEFAULTENCODING` +environment variable and ``encoding="locale"`` argument value. From e3bce76bfa035b7654058c5e882d7f2801093835 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 24 Mar 2021 11:39:20 +0900 Subject: [PATCH 49/50] Apply suggestions from code review Co-authored-by: CAM Gerlach --- Doc/library/io.rst | 2 +- Doc/whatsnew/3.10.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index edef84940d0f08..92f76f222b76f9 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -1009,7 +1009,7 @@ Text I/O encoding instead of the user preferred encoding. .. versionchanged:: 3.10 - *encoding* option now supports ``"locale"`` dummy encoding name. + The *encoding* argument now supports the ``"locale"`` dummy encoding name. :class:`TextIOWrapper` provides these data attributes and methods in addition to those from :class:`TextIOBase` and :class:`IOBase`: diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 923720659fcaaa..33be5bd0b54ba3 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -454,7 +454,7 @@ platform and locale dependent. Since UTF-8 is used on most Unix platforms, omitting ``encoding`` option when opening UTF-8 files (e.g. JSON, YAML, TOML, Markdown) is very common bug. For example:: - # BUG: "b" mode or encoding="utf-8" should be used. + # BUG: "rb" mode or encoding="utf-8" should be used. with open("data.json") as f: data = json.laod(f) From c089fd786bf444eb80e4f19115ff37c6081acc92 Mon Sep 17 00:00:00 2001 From: Inada Naoki Date: Wed, 24 Mar 2021 16:50:23 +0900 Subject: [PATCH 50/50] Update Doc/library/io.rst Co-authored-by: CAM Gerlach --- Doc/library/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/io.rst b/Doc/library/io.rst index 92f76f222b76f9..ee7aed8a5c0f78 100644 --- a/Doc/library/io.rst +++ b/Doc/library/io.rst @@ -127,7 +127,7 @@ Additionally, while there is no concrete plan as of yet, Python may change the default text file encoding to UTF-8 in the future. Accordingly, it is highly recommended that you specify the encoding -explicitly when opening text files. If you want to use UTF-8, pass +explicitly when opening text files. If you want to use UTF-8, pass ``encoding="utf-8"``. To use the current locale encoding, ``encoding="locale"`` is supported in Python 3.10.