diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py
index 1d56ccd71cf962..d4b4f60be980a5 100644
--- a/Lib/test/test_pyexpat.py
+++ b/Lib/test/test_pyexpat.py
@@ -9,12 +9,11 @@
from io import BytesIO
from test import support
from test.support import os_helper
-
+from test.support import sortdict
+from unittest import mock
from xml.parsers import expat
from xml.parsers.expat import errors
-from test.support import sortdict
-
class SetAttributeTest(unittest.TestCase):
def setUp(self):
@@ -436,6 +435,19 @@ def test7(self):
"", "4", "", "5", ""],
"buffered text not properly split")
+ def test_change_character_data_handler_in_callback(self):
+ # Test that xmlparse_handler_setter() properly handles
+ # the special case "parser.CharacterDataHandler = None".
+ def handler(*args):
+ parser.CharacterDataHandler = None
+
+ handler_wrapper = mock.Mock(wraps=handler)
+ parser = expat.ParserCreate()
+ parser.CharacterDataHandler = handler_wrapper
+ parser.Parse(b"12345 ", True)
+ handler_wrapper.assert_called_once()
+ self.assertIsNone(parser.CharacterDataHandler)
+
# Test handling of exception from callback:
class HandlerExceptionTest(unittest.TestCase):
@@ -595,7 +607,7 @@ def test_unchanged_size(self):
def test_disabling_buffer(self):
xml1 = b"" + b'a' * 512
xml2 = b'b' * 1024
- xml3 = b'c' * 1024 + b'';
+ xml3 = b'c' * 1024 + b''
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_text = 1
diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c
index fa153d86543e99..2a1696ec65666b 100644
--- a/Modules/pyexpat.c
+++ b/Modules/pyexpat.c
@@ -98,7 +98,11 @@ typedef struct {
#define CHARACTER_DATA_BUFFER_SIZE 8192
-typedef const void *xmlhandler;
+// A generic function type for storage.
+// To avoid undefined behaviors, a handler must be cast to the correct
+// function type before it's called; see SETTER_WRAPPER below.
+typedef void (*xmlhandler)(void);
+
typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);
struct HandlerInfo {
@@ -110,9 +114,7 @@ struct HandlerInfo {
static struct HandlerInfo handler_info[64];
-// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact
-// handler API for each handler.
-static inline void _Py_NO_SANITIZE_UNDEFINED
+static inline void
CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info,
XML_Parser xml_parser, xmlhandler xml_handler)
{
@@ -1365,7 +1367,7 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
elaborate system of handlers and state could remove the
C handler more effectively. */
if (handlernum == CharacterData && self->in_callback) {
- c_handler = noop_character_data_handler;
+ c_handler = (xmlhandler)noop_character_data_handler;
}
v = NULL;
}
@@ -2222,13 +2224,84 @@ clear_handlers(xmlparseobject *self, int initial)
}
}
+/* To avoid undefined behaviors, a function must be *called* via a function
+ * pointer of the correct type.
+ * So, for each `XML_Set*` function, we define a wrapper that calls `XML_Set*`
+ * with its argument cast to the appropriate type.
+ */
+
+typedef void (*parser_only)(void *);
+typedef int (*not_standalone)(void *);
+typedef void (*parser_and_data)(void *, const XML_Char *);
+typedef void (*parser_and_data_and_int)(void *, const XML_Char *, int);
+typedef void (*parser_and_data_and_data)(
+ void *, const XML_Char *, const XML_Char *);
+typedef void (*start_element)(void *, const XML_Char *, const XML_Char **);
+typedef void (*element_decl)(void *, const XML_Char *, XML_Content *);
+typedef void (*xml_decl)(
+ void *, const XML_Char *, const XML_Char *, int);
+typedef void (*start_doctype_decl)(
+ void *, const XML_Char *, const XML_Char *, const XML_Char *, int);
+typedef void (*notation_decl)(
+ void *,
+ const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
+typedef void (*attlist_decl)(
+ void *,
+ const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *,
+ int);
+typedef void (*unparsed_entity_decl)(
+ void *,
+ const XML_Char *, const XML_Char *,
+ const XML_Char *, const XML_Char *, const XML_Char *);
+typedef void (*entity_decl)(
+ void *,
+ const XML_Char *, int,
+ const XML_Char *, int,
+ const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
+typedef int (*external_entity_ref)(
+ XML_Parser,
+ const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
+
+#define SETTER_WRAPPER(NAME, TYPE) \
+ static inline void \
+ my_Set ## NAME (XML_Parser parser, xmlhandler handler) \
+ { \
+ (void)XML_Set ## NAME (parser, (TYPE)handler); \
+ }
+
+SETTER_WRAPPER(StartElementHandler, start_element)
+SETTER_WRAPPER(EndElementHandler, parser_and_data)
+SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data)
+SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int)
+SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl)
+SETTER_WRAPPER(NotationDeclHandler, notation_decl)
+SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data)
+SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data)
+SETTER_WRAPPER(CommentHandler, parser_and_data)
+SETTER_WRAPPER(StartCdataSectionHandler, parser_only)
+SETTER_WRAPPER(EndCdataSectionHandler, parser_only)
+SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int)
+SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int)
+SETTER_WRAPPER(NotStandaloneHandler, not_standalone)
+SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref)
+SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl)
+SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only)
+SETTER_WRAPPER(EntityDeclHandler, entity_decl)
+SETTER_WRAPPER(XmlDeclHandler, xml_decl)
+SETTER_WRAPPER(ElementDeclHandler, element_decl)
+SETTER_WRAPPER(AttlistDeclHandler, attlist_decl)
+#if XML_COMBINED_VERSION >= 19504
+SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int)
+#endif
+#undef SETTER_WRAPPER
+
static struct HandlerInfo handler_info[] = {
// The cast to `xmlhandlersetter` is needed as the signature of XML
// handler functions is not compatible with `xmlhandlersetter` since
// their second parameter is narrower than a `const void *`.
#define HANDLER_INFO(name) \
- {#name, (xmlhandlersetter)XML_Set##name, my_##name},
+ {#name, (xmlhandlersetter)my_Set##name, (xmlhandler)my_##name},
HANDLER_INFO(StartElementHandler)
HANDLER_INFO(EndElementHandler)