Skip to content

[WIP] gh-89621: re.subn?(): raise error when RegexFlag is wrongly used as count argument #91477

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
"""

import enum
from _sre import _set_RegexFlag_type
from . import _compiler, _parser
import functools

Expand Down Expand Up @@ -152,6 +153,9 @@ class RegexFlag:
__str__ = object.__str__
_numeric_repr_ = hex

# register the type of RegexFlag
_set_RegexFlag_type(RegexFlag)

# sre exception
error = _compiler.error

Expand Down
46 changes: 38 additions & 8 deletions Modules/_sre/clinic/sre.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

82 changes: 73 additions & 9 deletions Modules/_sre/sre.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ typedef struct {
PyTypeObject *Pattern_Type;
PyTypeObject *Match_Type;
PyTypeObject *Scanner_Type;
PyTypeObject *RegexFlag_Type;
} _sremodulestate;

static _sremodulestate *
Expand Down Expand Up @@ -1249,19 +1250,36 @@ _sre.SRE_Pattern.sub
/
repl: object
string: object
count: Py_ssize_t = 0
count: object(c_default="NULL") = 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It could be easier to implement a custom converter (see docs for PyArg_Parse, format unit O&).

    count: object(converter="my_converter_for_ssize_t_except_RegexFlag") = 0


Return the string obtained by replacing the leftmost non-overlapping occurrences of pattern in string by the replacement repl.
[clinic start generated code]*/

static PyObject *
_sre_SRE_Pattern_sub_impl(PatternObject *self, PyTypeObject *cls,
PyObject *repl, PyObject *string, Py_ssize_t count)
/*[clinic end generated code: output=4be141ab04bca60d input=d8d1d4ac2311a07c]*/
PyObject *repl, PyObject *string, PyObject *count)
/*[clinic end generated code: output=9f7f5eca541878cb input=8b48c662991e844f]*/
{
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
Py_ssize_t count_value;

return pattern_subx(module_state, self, repl, string, count, 0);
/* Some users mistakenly pass flags to count parameter */
if (count == NULL) {
count_value = 0;
} else if (Py_TYPE(count) == module_state->RegexFlag_Type) {
PyErr_SetString(PyExc_TypeError,
"count argument should not be RegexFlag.");
return NULL;
} else {
count_value = PyLong_AsSsize_t(count);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Look at the code for the n format unit in getargs.c. You should reproduce it.

if (count_value == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"count arguemnt wrong type.");
Comment on lines +1276 to +1277
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just keep the existing error. ValueError is wrong exception for wrong type, and the raised exception can be for example OverflowError.

return NULL;
}
}

return pattern_subx(module_state, self, repl, string, count_value, 0);
}

/*[clinic input]
Expand All @@ -1271,20 +1289,36 @@ _sre.SRE_Pattern.subn
/
repl: object
string: object
count: Py_ssize_t = 0
count: object(c_default="NULL") = 0

Return the tuple (new_string, number_of_subs_made) found by replacing the leftmost non-overlapping occurrences of pattern with the replacement repl.
[clinic start generated code]*/

static PyObject *
_sre_SRE_Pattern_subn_impl(PatternObject *self, PyTypeObject *cls,
PyObject *repl, PyObject *string,
Py_ssize_t count)
/*[clinic end generated code: output=da02fd85258b1e1f input=8b78a65b8302e58d]*/
PyObject *repl, PyObject *string, PyObject *count)
/*[clinic end generated code: output=7d66d9c3696121e6 input=3c0642c0ba657dc1]*/
{
_sremodulestate *module_state = get_sre_module_state_by_class(cls);
Py_ssize_t count_value;

/* Some users mistakenly pass flags to count parameter */
if (count == NULL) {
count_value = 0;
} else if (Py_TYPE(count) == module_state->RegexFlag_Type) {
PyErr_SetString(PyExc_TypeError,
"count argument should not be RegexFlag.");
return NULL;
} else {
count_value = PyLong_AsSsize_t(count);
if (count_value == -1 && PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"count arguemnt wrong type.");
return NULL;
}
}

return pattern_subx(module_state, self, repl, string, count, 1);
return pattern_subx(module_state, self, repl, string, count_value, 1);
}

/*[clinic input]
Expand Down Expand Up @@ -2003,6 +2037,32 @@ _validate(PatternObject *self)
return 1;
}

/*[clinic input]
_sre._set_RegexFlag_type

RegexFlag_Type: object

[clinic start generated code]*/

static PyObject *
_sre__set_RegexFlag_type_impl(PyObject *module, PyObject *RegexFlag_Type)
/*[clinic end generated code: output=1276fbd158d1c5b2 input=fa4eabcd03dd9819]*/
{
_sremodulestate *module_state = get_sre_module_state(module);

if (!PyType_Check(RegexFlag_Type)) {
PyErr_SetString(PyExc_ValueError,
"The argument should be RegexFlag type.");
return NULL;
}

Py_INCREF(RegexFlag_Type);
module_state->RegexFlag_Type = (PyTypeObject*) RegexFlag_Type;

Py_RETURN_NONE;
}


/* -------------------------------------------------------------------- */
/* match methods */

Expand Down Expand Up @@ -2951,6 +3011,7 @@ static PyMethodDef _functions[] = {
_SRE_UNICODE_ISCASED_METHODDEF
_SRE_ASCII_TOLOWER_METHODDEF
_SRE_UNICODE_TOLOWER_METHODDEF
_SRE__SET_REGEXFLAG_TYPE_METHODDEF
{NULL, NULL}
};

Expand All @@ -2962,6 +3023,7 @@ sre_traverse(PyObject *module, visitproc visit, void *arg)
Py_VISIT(state->Pattern_Type);
Py_VISIT(state->Match_Type);
Py_VISIT(state->Scanner_Type);
Py_VISIT(state->RegexFlag_Type);

return 0;
}
Expand All @@ -2974,6 +3036,7 @@ sre_clear(PyObject *module)
Py_CLEAR(state->Pattern_Type);
Py_CLEAR(state->Match_Type);
Py_CLEAR(state->Scanner_Type);
Py_CLEAR(state->RegexFlag_Type);

return 0;
}
Expand Down Expand Up @@ -3014,6 +3077,7 @@ sre_exec(PyObject *m)
CREATE_TYPE(m, state->Pattern_Type, &pattern_spec);
CREATE_TYPE(m, state->Match_Type, &match_spec);
CREATE_TYPE(m, state->Scanner_Type, &scanner_spec);
state->RegexFlag_Type = NULL;

if (PyModule_AddIntConstant(m, "MAGIC", SRE_MAGIC) < 0) {
goto error;
Expand Down