Skip to content

gh-133157: remove usage of _Py_NO_SANITIZE_UNDEFINED in pyexpat #135346

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions Lib/test/test_pyexpat.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
from io import BytesIO
from test import support
from test.support import os_helper

from test.support import sortdict
from unittest import mock
from xml.parsers import expat
from xml.parsers.expat import errors

from test.support import sortdict


class SetAttributeTest(unittest.TestCase):
def setUp(self):
Expand Down Expand Up @@ -436,6 +435,19 @@ def test7(self):
"<!--abc-->", "4", "<!--def-->", "5", "</a>"],
"buffered text not properly split")

def test_change_character_data_handler_in_callback(self):
# Test that xmlparse_handler_setter() properly handles
# the special case "parser.CharacterDataHandler = None".
def handler(*args):
parser.CharacterDataHandler = None

handler_wrapper = mock.Mock(wraps=handler)
parser = expat.ParserCreate()
parser.CharacterDataHandler = handler_wrapper
parser.Parse(b"<a>1<b/>2<c></c>3<!--abc-->4<!--def-->5</a> ", True)
handler_wrapper.assert_called_once()
self.assertIsNone(parser.CharacterDataHandler)


# Test handling of exception from callback:
class HandlerExceptionTest(unittest.TestCase):
Expand Down Expand Up @@ -595,7 +607,7 @@ def test_unchanged_size(self):
def test_disabling_buffer(self):
xml1 = b"<?xml version='1.0' encoding='iso8859'?><a>" + b'a' * 512
xml2 = b'b' * 1024
xml3 = b'c' * 1024 + b'</a>';
xml3 = b'c' * 1024 + b'</a>'
parser = expat.ParserCreate()
parser.CharacterDataHandler = self.counting_handler
parser.buffer_text = 1
Expand Down
85 changes: 79 additions & 6 deletions Modules/pyexpat.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,11 @@ typedef struct {

#define CHARACTER_DATA_BUFFER_SIZE 8192

typedef const void *xmlhandler;
// A generic function type for storage.
// To avoid undefined behaviors, a handler must be cast to the correct
// function type before it's called; see SETTER_WRAPPER below.
typedef void (*xmlhandler)(void);

typedef void (*xmlhandlersetter)(XML_Parser self, xmlhandler handler);

struct HandlerInfo {
Expand All @@ -110,9 +114,7 @@ struct HandlerInfo {

static struct HandlerInfo handler_info[64];

// gh-111178: Use _Py_NO_SANITIZE_UNDEFINED, rather than using the exact
// handler API for each handler.
static inline void _Py_NO_SANITIZE_UNDEFINED
static inline void
CALL_XML_HANDLER_SETTER(const struct HandlerInfo *handler_info,
XML_Parser xml_parser, xmlhandler xml_handler)
{
Expand Down Expand Up @@ -1365,7 +1367,7 @@ xmlparse_handler_setter(PyObject *op, PyObject *v, void *closure)
elaborate system of handlers and state could remove the
C handler more effectively. */
if (handlernum == CharacterData && self->in_callback) {
c_handler = noop_character_data_handler;
c_handler = (xmlhandler)noop_character_data_handler;
}
v = NULL;
}
Expand Down Expand Up @@ -2222,13 +2224,84 @@ clear_handlers(xmlparseobject *self, int initial)
}
}

/* To avoid undefined behaviors, a function must be *called* via a function
* pointer of the correct type.
* So, for each `XML_Set*` function, we define a wrapper that calls `XML_Set*`
* with its argument cast to the appropriate type.
*/

typedef void (*parser_only)(void *);
typedef int (*not_standalone)(void *);
typedef void (*parser_and_data)(void *, const XML_Char *);
typedef void (*parser_and_data_and_int)(void *, const XML_Char *, int);
typedef void (*parser_and_data_and_data)(
void *, const XML_Char *, const XML_Char *);
typedef void (*start_element)(void *, const XML_Char *, const XML_Char **);
typedef void (*element_decl)(void *, const XML_Char *, XML_Content *);
typedef void (*xml_decl)(
void *, const XML_Char *, const XML_Char *, int);
typedef void (*start_doctype_decl)(
void *, const XML_Char *, const XML_Char *, const XML_Char *, int);
typedef void (*notation_decl)(
void *,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
typedef void (*attlist_decl)(
void *,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *,
int);
typedef void (*unparsed_entity_decl)(
void *,
const XML_Char *, const XML_Char *,
const XML_Char *, const XML_Char *, const XML_Char *);
typedef void (*entity_decl)(
void *,
const XML_Char *, int,
const XML_Char *, int,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);
typedef int (*external_entity_ref)(
XML_Parser,
const XML_Char *, const XML_Char *, const XML_Char *, const XML_Char *);

#define SETTER_WRAPPER(NAME, TYPE) \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe call it XML_SETTER_WRAPPER?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In a local macro in this file, I don't think it's necessary.

static inline void \
my_Set ## NAME (XML_Parser parser, xmlhandler handler) \
{ \
(void)XML_Set ## NAME (parser, (TYPE)handler); \
}

SETTER_WRAPPER(StartElementHandler, start_element)
SETTER_WRAPPER(EndElementHandler, parser_and_data)
SETTER_WRAPPER(ProcessingInstructionHandler, parser_and_data_and_data)
SETTER_WRAPPER(CharacterDataHandler, parser_and_data_and_int)
SETTER_WRAPPER(UnparsedEntityDeclHandler, unparsed_entity_decl)
SETTER_WRAPPER(NotationDeclHandler, notation_decl)
SETTER_WRAPPER(StartNamespaceDeclHandler, parser_and_data_and_data)
SETTER_WRAPPER(EndNamespaceDeclHandler, parser_and_data)
SETTER_WRAPPER(CommentHandler, parser_and_data)
SETTER_WRAPPER(StartCdataSectionHandler, parser_only)
SETTER_WRAPPER(EndCdataSectionHandler, parser_only)
SETTER_WRAPPER(DefaultHandler, parser_and_data_and_int)
SETTER_WRAPPER(DefaultHandlerExpand, parser_and_data_and_int)
SETTER_WRAPPER(NotStandaloneHandler, not_standalone)
SETTER_WRAPPER(ExternalEntityRefHandler, external_entity_ref)
SETTER_WRAPPER(StartDoctypeDeclHandler, start_doctype_decl)
SETTER_WRAPPER(EndDoctypeDeclHandler, parser_only)
SETTER_WRAPPER(EntityDeclHandler, entity_decl)
SETTER_WRAPPER(XmlDeclHandler, xml_decl)
SETTER_WRAPPER(ElementDeclHandler, element_decl)
SETTER_WRAPPER(AttlistDeclHandler, attlist_decl)
#if XML_COMBINED_VERSION >= 19504
SETTER_WRAPPER(SkippedEntityHandler, parser_and_data_and_int)
#endif
#undef SETTER_WRAPPER

static struct HandlerInfo handler_info[] = {

// The cast to `xmlhandlersetter` is needed as the signature of XML
// handler functions is not compatible with `xmlhandlersetter` since
// their second parameter is narrower than a `const void *`.
#define HANDLER_INFO(name) \
{#name, (xmlhandlersetter)XML_Set##name, my_##name},
{#name, (xmlhandlersetter)my_Set##name, (xmlhandler)my_##name},

HANDLER_INFO(StartElementHandler)
HANDLER_INFO(EndElementHandler)
Expand Down
Loading