diff --git a/Lib/test/test_pyexpat.py b/Lib/test/test_pyexpat.py index 1d56ccd71cf962..d4b4f60be980a5 100644 --- a/Lib/test/test_pyexpat.py +++ b/Lib/test/test_pyexpat.py @@ -9,12 +9,11 @@ from io import BytesIO from test import support from test.support import os_helper - +from test.support import sortdict +from unittest import mock from xml.parsers import expat from xml.parsers.expat import errors -from test.support import sortdict - class SetAttributeTest(unittest.TestCase): def setUp(self): @@ -436,6 +435,19 @@ def test7(self): "", "4", "", "5", ""], "buffered text not properly split") + def test_change_character_data_handler_in_callback(self): + # Test that xmlparse_handler_setter() properly handles + # the special case "parser.CharacterDataHandler = None". + def handler(*args): + parser.CharacterDataHandler = None + + handler_wrapper = mock.Mock(wraps=handler) + parser = expat.ParserCreate() + parser.CharacterDataHandler = handler_wrapper + parser.Parse(b"12345 ", True) + handler_wrapper.assert_called_once() + self.assertIsNone(parser.CharacterDataHandler) + # Test handling of exception from callback: class HandlerExceptionTest(unittest.TestCase): @@ -595,7 +607,7 @@ def test_unchanged_size(self): def test_disabling_buffer(self): xml1 = b"" + b'a' * 512 xml2 = b'b' * 1024 - xml3 = b'c' * 1024 + b''; + xml3 = b'c' * 1024 + b'' parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_text = 1 diff --git a/Modules/pyexpat.c b/Modules/pyexpat.c index fa153d86543e99..2a1696ec65666b 100644 --- a/Modules/pyexpat.c +++ b/Modules/pyexpat.c @@ -98,7 +98,11 @@ typedef struct { #define CHARACTER_DATA_BUFFER_SIZE 8192 -typedef const void *xmlhandler; +// A generic function type for storage. +// To avoid undefined behaviors, a handler must be cast to the correct +// function type before it's called; see SETTER_WRAPPER below. +typedef void (*xmlhandler)(void); + typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler); struct HandlerInfo { @@ -110,9 +114,7 @@ struct HandlerInfo { static struct HandlerInfo handler_info[64]; -// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact -// handler API for each handler. -static inline void _Py_NO_SANITIZE_UNDEFINED +static inline void CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info, XML_Parser xml_parser, xmlhandler xml_handler) { @@ -1365,7 +1367,7 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure) elaborate system of handlers and state could remove the C handler more effectively. */ if (handlernum == CharacterData && self->in_callback) { - c_handler = noop_character_data_handler; + c_handler = (xmlhandler)noop_character_data_handler; } v = NULL; } @@ -2222,13 +2224,84 @@ clear_handlers(xmlparseobject *self, int initial) } } +/* To avoid undefined behaviors, a function must be *called* via a function + * pointer of the correct type. + * So, for each `XML_Set*` function, we define a wrapper that calls `XML_Set*` + * with its argument cast to the appropriate type. + */ + +typedef void (*parser_only)(void *); +typedef int (*not_standalone)(void *); +typedef void (*parser_and_data)(void *, const XML_Char *); +typedef void (*parser_and_data_and_int)(void *, const XML_Char *, int); +typedef void (*parser_and_data_and_data)( + void *, const XML_Char *, const XML_Char *); +typedef void (*start_element)(void *, const XML_Char *, const XML_Char **); +typedef void (*element_decl)(void *, const XML_Char *, XML_Content *); +typedef void (*xml_decl)( + void *, const XML_Char *, const XML_Char *, int); +typedef void (*start_doctype_decl)( + void *, const XML_Char *, const XML_Char *, const XML_Char *, int); +typedef void (*notation_decl)( + void *, + const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); +typedef void (*attlist_decl)( + void *, + const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *, + int); +typedef void (*unparsed_entity_decl)( + void *, + const XML_Char *, const XML_Char *, + const XML_Char *, const XML_Char *, const XML_Char *); +typedef void (*entity_decl)( + void *, + const XML_Char *, int, + const XML_Char *, int, + const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); +typedef int (*external_entity_ref)( + XML_Parser, + const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *); + +#define SETTER_WRAPPER(NAME, TYPE) \ + static inline void \ + my_Set ## NAME (XML_Parser parser, xmlhandler handler) \ + { \ + (void)XML_Set ## NAME (parser, (TYPE)handler); \ + } + +SETTER_WRAPPER(StartElementHandler, start_element) +SETTER_WRAPPER(EndElementHandler, parser_and_data) +SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data) +SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int) +SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl) +SETTER_WRAPPER(NotationDeclHandler, notation_decl) +SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data) +SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data) +SETTER_WRAPPER(CommentHandler, parser_and_data) +SETTER_WRAPPER(StartCdataSectionHandler, parser_only) +SETTER_WRAPPER(EndCdataSectionHandler, parser_only) +SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int) +SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int) +SETTER_WRAPPER(NotStandaloneHandler, not_standalone) +SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref) +SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl) +SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only) +SETTER_WRAPPER(EntityDeclHandler, entity_decl) +SETTER_WRAPPER(XmlDeclHandler, xml_decl) +SETTER_WRAPPER(ElementDeclHandler, element_decl) +SETTER_WRAPPER(AttlistDeclHandler, attlist_decl) +#if XML_COMBINED_VERSION >= 19504 +SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int) +#endif +#undef SETTER_WRAPPER + static struct HandlerInfo handler_info[] = { // The cast to `xmlhandlersetter` is needed as the signature of XML // handler functions is not compatible with `xmlhandlersetter` since // their second parameter is narrower than a `const void *`. #define HANDLER_INFO(name) \ - {#name, (xmlhandlersetter)XML_Set##name, my_##name}, + {#name, (xmlhandlersetter)my_Set##name, (xmlhandler)my_##name}, HANDLER_INFO(StartElementHandler) HANDLER_INFO(EndElementHandler)