Skip to content

gh-126742: allow to use non-UTF8 exception messages #126746

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 47 commits into from
Dec 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
24eb521
allow to use translated exception messages
picnixz Nov 12, 2024
4c0f85b
Use 'surrogateescape' handler instead of 'strict' handler.
picnixz Nov 12, 2024
f432dc9
fix typos
picnixz Nov 20, 2024
434cf5b
Merge branch 'main' into feat/capi/set-locale-exception-126742
picnixz Nov 20, 2024
48238fc
Allow to set localized error string.
picnixz Nov 20, 2024
a38105b
Use `PyErr_SetLocaleString` when possible
picnixz Nov 20, 2024
714dcb7
Indicate which external functions may return non-UTF8 error strings.
picnixz Nov 20, 2024
ee1f3c6
Merge branch 'main' into feat/capi/set-locale-exception-126742
picnixz Nov 29, 2024
3d71f6b
fix build
picnixz Nov 29, 2024
1a23bf5
update dll
picnixz Nov 29, 2024
16a98a2
add docs & news
picnixz Nov 29, 2024
5a150d2
update comment
picnixz Nov 29, 2024
074bd4c
update TODO
picnixz Nov 29, 2024
7f3a909
update comment
picnixz Nov 29, 2024
1a321f9
Revert "add docs & news"
picnixz Dec 2, 2024
f5813a9
keep one internal helper instead of a public/private API pair
picnixz Dec 2, 2024
84afad2
rename references to `PyErr_SetLocaleString`
picnixz Dec 2, 2024
166a736
Simplify logic
picnixz Dec 2, 2024
7c9a793
Merge branch 'main' into fix/locale-set-object-exception-126742
picnixz Dec 2, 2024
314c9f1
Remove entry from refcounts.dat.
picnixz Dec 2, 2024
571de05
update comment
picnixz Dec 2, 2024
2633d18
Update Include/internal/pycore_pyerrors.h
picnixz Dec 2, 2024
45442c3
fix grammar
picnixz Dec 2, 2024
6bedc4b
Merge branch 'main' into fix/locale-set-object-exception-126742
picnixz Dec 3, 2024
6f9ee0b
Unconditionally re-raise decoding errors
picnixz Dec 8, 2024
b02a715
add tests for `_PyErr_SetLocaleString`
picnixz Dec 9, 2024
47d50b8
add tests for `dlerror` and `gdbm_*` functions
picnixz Dec 9, 2024
574184c
fix tests
picnixz Dec 12, 2024
4fecd76
remove C API internal tests
picnixz Dec 12, 2024
26f074c
cosmetic changes on imports
picnixz Dec 12, 2024
cb4d5e4
fix tests
picnixz Dec 13, 2024
6798f21
fix `dbm` tests
picnixz Dec 14, 2024
ce8ae02
improve test coverage
picnixz Dec 14, 2024
8d442a3
fix macOS tests
picnixz Dec 14, 2024
f06d6a5
update names
picnixz Dec 14, 2024
1c05250
fix tests
picnixz Dec 14, 2024
3d69de1
fix tests (again???)
picnixz Dec 14, 2024
a079511
fix tests (macOS)
picnixz Dec 14, 2024
8618de8
fix tests (windows)
picnixz Dec 14, 2024
9bc4012
fix macOS tests
picnixz Dec 14, 2024
1ea0f73
fix windows tests
picnixz Dec 14, 2024
09d60f4
revert a missing line
picnixz Dec 15, 2024
a5722b1
revert changes
picnixz Dec 15, 2024
5fc2e17
use libc.so instead
picnixz Dec 16, 2024
62db957
address Victor's review
picnixz Dec 16, 2024
c665a2d
Fix tests (hopefully it will still work)
picnixz Dec 16, 2024
765e16c
Reverting last commit to use `run_with_locale` only
picnixz Dec 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Include/internal/pycore_pyerrors.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,18 @@ PyAPI_FUNC(void) _PyErr_SetString(
PyObject *exception,
const char *string);

/*
* Set an exception with the error message decoded from the current locale
* encoding (LC_CTYPE).
*
* Exceptions occurring in decoding take priority over the desired exception.
*
* Exported for '_ctypes' shared extensions.
*/
PyAPI_FUNC(void) _PyErr_SetLocaleString(
PyObject *exception,
const char *string);

PyAPI_FUNC(PyObject*) _PyErr_Format(
PyThreadState *tstate,
PyObject *exception,
Expand Down
80 changes: 70 additions & 10 deletions Lib/test/test_ctypes/test_dlerror.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
import _ctypes
import os
import platform
import sys
import test.support
import unittest
import platform
from ctypes import CDLL, c_int
from ctypes.util import find_library


FOO_C = r"""
#include <unistd.h>
Expand All @@ -26,7 +31,7 @@


@unittest.skipUnless(sys.platform.startswith('linux'),
'Test only valid for Linux')
'test requires GNU IFUNC support')
class TestNullDlsym(unittest.TestCase):
"""GH-126554: Ensure that we catch NULL dlsym return values

Expand All @@ -53,14 +58,6 @@ def test_null_dlsym(self):
import subprocess
import tempfile

# To avoid ImportErrors on Windows, where _ctypes does not have
# dlopen and dlsym,
# import here, i.e., inside the test function.
# The skipUnless('linux') decorator ensures that we're on linux
# if we're executing these statements.
from ctypes import CDLL, c_int
from _ctypes import dlopen, dlsym

retcode = subprocess.call(["gcc", "--version"],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
Expand Down Expand Up @@ -111,6 +108,8 @@ def test_null_dlsym(self):
self.assertEqual(os.read(pipe_r, 2), b'OK')

# Case #3: Test 'py_dl_sym' from Modules/_ctypes/callproc.c
dlopen = test.support.get_attribute(_ctypes, 'dlopen')
dlsym = test.support.get_attribute(_ctypes, 'dlsym')
L = dlopen(dstname)
with self.assertRaisesRegex(OSError, "symbol 'foo' not found"):
dlsym(L, "foo")
Expand All @@ -119,5 +118,66 @@ def test_null_dlsym(self):
self.assertEqual(os.read(pipe_r, 2), b'OK')


@unittest.skipUnless(os.name != 'nt', 'test requires dlerror() calls')
class TestLocalization(unittest.TestCase):

@staticmethod
def configure_locales(func):
return test.support.run_with_locale(
'LC_ALL',
'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk',
'fr_FR.utf8', 'en_US.utf8',
'',
)(func)

@classmethod
def setUpClass(cls):
cls.libc_filename = find_library("c")

@configure_locales
def test_localized_error_from_dll(self):
dll = CDLL(self.libc_filename)
with self.assertRaises(AttributeError) as cm:
dll.this_name_does_not_exist
if sys.platform.startswith('linux'):
# On macOS, the filename is not reported by dlerror().
self.assertIn(self.libc_filename, str(cm.exception))

@configure_locales
def test_localized_error_in_dll(self):
dll = CDLL(self.libc_filename)
with self.assertRaises(ValueError) as cm:
c_int.in_dll(dll, 'this_name_does_not_exist')
if sys.platform.startswith('linux'):
# On macOS, the filename is not reported by dlerror().
self.assertIn(self.libc_filename, str(cm.exception))

@unittest.skipUnless(hasattr(_ctypes, 'dlopen'),
'test requires _ctypes.dlopen()')
@configure_locales
def test_localized_error_dlopen(self):
missing_filename = b'missing\xff.so'
# Depending whether the locale, we may encode '\xff' differently
# but we are only interested in avoiding a UnicodeDecodeError
# when reporting the dlerror() error message which contains
# the localized filename.
filename_pattern = r'missing.*?\.so'
with self.assertRaisesRegex(OSError, filename_pattern):
_ctypes.dlopen(missing_filename, 2)

@unittest.skipUnless(hasattr(_ctypes, 'dlopen'),
'test requires _ctypes.dlopen()')
@unittest.skipUnless(hasattr(_ctypes, 'dlsym'),
'test requires _ctypes.dlsym()')
@configure_locales
def test_localized_error_dlsym(self):
dll = _ctypes.dlopen(self.libc_filename)
with self.assertRaises(OSError) as cm:
_ctypes.dlsym(dll, 'this_name_does_not_exist')
if sys.platform.startswith('linux'):
# On macOS, the filename is not reported by dlerror().
self.assertIn(self.libc_filename, str(cm.exception))


if __name__ == "__main__":
unittest.main()
21 changes: 16 additions & 5 deletions Lib/test/test_dbm_gnu.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from test import support
from test.support import import_helper, cpython_only
gdbm = import_helper.import_module("dbm.gnu") #skip if not supported
import unittest
import os
from test.support.os_helper import TESTFN, TESTFN_NONASCII, unlink, FakePath
import unittest
from test import support
from test.support import cpython_only, import_helper
from test.support.os_helper import (TESTFN, TESTFN_NONASCII, FakePath,
create_empty_file, temp_dir, unlink)

gdbm = import_helper.import_module("dbm.gnu") # skip if not supported

filename = TESTFN

Expand Down Expand Up @@ -205,6 +206,16 @@ def test_clear(self):
self.assertNotIn(k, db)
self.assertEqual(len(db), 0)

@support.run_with_locale(
'LC_ALL',
'fr_FR.iso88591', 'ja_JP.sjis', 'zh_CN.gbk',
'fr_FR.utf8', 'en_US.utf8',
'',
)
def test_localized_error(self):
with temp_dir() as d:
create_empty_file(os.path.join(d, 'test'))
self.assertRaises(gdbm.error, gdbm.open, filename, 'r')


if __name__ == '__main__':
Expand Down
28 changes: 7 additions & 21 deletions Modules/_ctypes/_ctypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -984,15 +984,8 @@ CDataType_in_dll_impl(PyObject *type, PyTypeObject *cls, PyObject *dll,
#ifdef USE_DLERROR
const char *dlerr = dlerror();
if (dlerr) {
PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
if (message) {
PyErr_SetObject(PyExc_ValueError, message);
Py_DECREF(message);
return NULL;
}
// Ignore errors from PyUnicode_DecodeLocale,
// fall back to the generic error below.
PyErr_Clear();
_PyErr_SetLocaleString(PyExc_ValueError, dlerr);
return NULL;
}
#endif
#undef USE_DLERROR
Expand Down Expand Up @@ -3805,21 +3798,14 @@ PyCFuncPtr_FromDll(PyTypeObject *type, PyObject *args, PyObject *kwds)
address = (PPROC)dlsym(handle, name);

if (!address) {
#ifdef USE_DLERROR
#ifdef USE_DLERROR
const char *dlerr = dlerror();
if (dlerr) {
PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
if (message) {
PyErr_SetObject(PyExc_AttributeError, message);
Py_DECREF(ftuple);
Py_DECREF(message);
return NULL;
}
// Ignore errors from PyUnicode_DecodeLocale,
// fall back to the generic error below.
PyErr_Clear();
_PyErr_SetLocaleString(PyExc_AttributeError, dlerr);
Py_DECREF(ftuple);
return NULL;
}
#endif
#endif
PyErr_Format(PyExc_AttributeError, "function '%s' not found", name);
Py_DECREF(ftuple);
return NULL;
Expand Down
38 changes: 18 additions & 20 deletions Modules/_ctypes/callproc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1588,10 +1588,11 @@ static PyObject *py_dl_open(PyObject *self, PyObject *args)
Py_XDECREF(name2);
if (!handle) {
const char *errmsg = dlerror();
if (!errmsg)
errmsg = "dlopen() error";
PyErr_SetString(PyExc_OSError,
errmsg);
if (errmsg) {
_PyErr_SetLocaleString(PyExc_OSError, errmsg);
return NULL;
}
PyErr_SetString(PyExc_OSError, "dlopen() error");
return NULL;
}
return PyLong_FromVoidPtr(handle);
Expand All @@ -1604,8 +1605,12 @@ static PyObject *py_dl_close(PyObject *self, PyObject *args)
if (!PyArg_ParseTuple(args, "O&:dlclose", &_parse_voidp, &handle))
return NULL;
if (dlclose(handle)) {
PyErr_SetString(PyExc_OSError,
dlerror());
const char *errmsg = dlerror();
if (errmsg) {
_PyErr_SetLocaleString(PyExc_OSError, errmsg);
return NULL;
}
PyErr_SetString(PyExc_OSError, "dlclose() error");
return NULL;
}
Py_RETURN_NONE;
Expand Down Expand Up @@ -1639,21 +1644,14 @@ static PyObject *py_dl_sym(PyObject *self, PyObject *args)
if (ptr) {
return PyLong_FromVoidPtr(ptr);
}
#ifdef USE_DLERROR
const char *dlerr = dlerror();
if (dlerr) {
PyObject *message = PyUnicode_DecodeLocale(dlerr, "surrogateescape");
if (message) {
PyErr_SetObject(PyExc_OSError, message);
Py_DECREF(message);
return NULL;
}
// Ignore errors from PyUnicode_DecodeLocale,
// fall back to the generic error below.
PyErr_Clear();
#ifdef USE_DLERROR
const char *errmsg = dlerror();
if (errmsg) {
_PyErr_SetLocaleString(PyExc_OSError, errmsg);
return NULL;
}
#endif
#undef USE_DLERROR
#endif
#undef USE_DLERROR
PyErr_Format(PyExc_OSError, "symbol '%s' not found", name);
return NULL;
}
Expand Down
44 changes: 33 additions & 11 deletions Modules/_gdbmmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@
#endif

#include "Python.h"
#include "pycore_pyerrors.h" // _PyErr_SetLocaleString()
#include "gdbm.h"

#include <fcntl.h>
#include <stdlib.h> // free()
#include <stdlib.h> // free()
#include <sys/stat.h>
#include <sys/types.h>

Expand All @@ -33,6 +34,24 @@ get_gdbm_state(PyObject *module)
return (_gdbm_state *)state;
}

/*
* Set the gdbm error obtained by gdbm_strerror(gdbm_errno).
*
* If no error message exists, a generic (UTF-8) error message
* is used instead.
*/
static void
set_gdbm_error(_gdbm_state *state, const char *generic_error)
{
const char *gdbm_errmsg = gdbm_strerror(gdbm_errno);
if (gdbm_errmsg) {
_PyErr_SetLocaleString(state->gdbm_error, gdbm_errmsg);
}
else {
PyErr_SetString(state->gdbm_error, generic_error);
}
}

/*[clinic input]
module _gdbm
class _gdbm.gdbm "gdbmobject *" "&Gdbmtype"
Expand Down Expand Up @@ -91,7 +110,7 @@ newgdbmobject(_gdbm_state *state, const char *file, int flags, int mode)
PyErr_SetFromErrnoWithFilename(state->gdbm_error, file);
}
else {
PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
set_gdbm_error(state, "gdbm_open() error");
}
Py_DECREF(dp);
return NULL;
Expand Down Expand Up @@ -136,7 +155,7 @@ gdbm_length(gdbmobject *dp)
PyErr_SetFromErrno(state->gdbm_error);
}
else {
PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
set_gdbm_error(state, "gdbm_count() error");
}
return -1;
}
Expand Down Expand Up @@ -286,7 +305,7 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w)
PyErr_SetObject(PyExc_KeyError, v);
}
else {
PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
set_gdbm_error(state, "gdbm_delete() error");
}
return -1;
}
Expand All @@ -297,11 +316,12 @@ gdbm_ass_sub(gdbmobject *dp, PyObject *v, PyObject *w)
}
errno = 0;
if (gdbm_store(dp->di_dbm, krec, drec, GDBM_REPLACE) < 0) {
if (errno != 0)
if (errno != 0) {
PyErr_SetFromErrno(state->gdbm_error);
else
PyErr_SetString(state->gdbm_error,
gdbm_strerror(gdbm_errno));
}
else {
set_gdbm_error(state, "gdbm_store() error");
}
return -1;
}
}
Expand Down Expand Up @@ -534,10 +554,12 @@ _gdbm_gdbm_reorganize_impl(gdbmobject *self, PyTypeObject *cls)
check_gdbmobject_open(self, state->gdbm_error);
errno = 0;
if (gdbm_reorganize(self->di_dbm) < 0) {
if (errno != 0)
if (errno != 0) {
PyErr_SetFromErrno(state->gdbm_error);
else
PyErr_SetString(state->gdbm_error, gdbm_strerror(gdbm_errno));
}
else {
set_gdbm_error(state, "gdbm_reorganize() error");
}
return NULL;
}
Py_RETURN_NONE;
Expand Down
1 change: 1 addition & 0 deletions Modules/_hashopenssl.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,7 @@ _setException(PyObject *exc, const char* altmsg, ...)
va_end(vargs);
ERR_clear_error();

/* ERR_ERROR_STRING(3) ensures that the messages below are ASCII */
lib = ERR_lib_error_string(errcode);
func = ERR_func_error_string(errcode);
reason = ERR_reason_error_string(errcode);
Expand Down
1 change: 1 addition & 0 deletions Modules/_sqlite/util.c
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ _pysqlite_seterror(pysqlite_state *state, sqlite3 *db)

/* Create and set the exception. */
int extended_errcode = sqlite3_extended_errcode(db);
// sqlite3_errmsg() always returns an UTF-8 encoded message
const char *errmsg = sqlite3_errmsg(db);
raise_exception(exc_class, extended_errcode, errmsg);
return extended_errcode;
Expand Down
1 change: 1 addition & 0 deletions Modules/_testcapi/exceptions.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "parts.h"
#include "util.h"

#include "clinic/exceptions.c.h"


Expand Down
Loading
Loading