From 541de3590ad6e0ea64c9f59f344ca895aa0ab907 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 14 Nov 2024 15:30:44 +0200 Subject: [PATCH 1/4] gh-67877: Fix memory leaks in terminated RE matching If SRE(match) function terminates abruptly, either because of a signal or because memory allocation fails, allocated SRE_REPEAT blocks might be never released. --- Lib/test/test_re.py | 21 +++ ...4-11-14-22-25-49.gh-issue-67877.G9hw0w.rst | 2 + Modules/_sre/clinic/sre.c.h | 44 +++++- Modules/_sre/sre.c | 142 +++++++++++++++--- Modules/_sre/sre.h | 16 +- Modules/_sre/sre_lib.h | 33 ++-- 6 files changed, 222 insertions(+), 36 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-11-14-22-25-49.gh-issue-67877.G9hw0w.rst diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 7bc702ec89a4a7..af36806bffeaf3 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2681,6 +2681,27 @@ def test_character_set_none(self): self.assertIsNone(re.search(p, s)) self.assertIsNone(re.search('(?s:.)' + p, s)) + def check_interrupt(self, pattern, string, maxcount): + class Interrupt(Exception): + pass + p = re.compile(pattern) + for n in range(maxcount): + p._fail_after(n, Interrupt) + try: + p.match(string) + return n + except Interrupt: + pass + + @unittest.skipUnless(hasattr(re.Pattern, '_fail_after'), 'requires debug build') + def test_memory_leaks(self): + self.check_interrupt(r'(.)*:', 'abc:', 100) + self.check_interrupt(r'([^:])*?:', 'abc:', 100) + self.check_interrupt(r'([^:])*+:', 'abc:', 100) + self.check_interrupt(r'(.){2,4}:', 'abc:', 100) + self.check_interrupt(r'([^:]){2,4}?:', 'abc:', 100) + self.check_interrupt(r'([^:]){2,4}+:', 'abc:', 100) + def get_debug_out(pat): with captured_stdout() as out: diff --git a/Misc/NEWS.d/next/Library/2024-11-14-22-25-49.gh-issue-67877.G9hw0w.rst b/Misc/NEWS.d/next/Library/2024-11-14-22-25-49.gh-issue-67877.G9hw0w.rst new file mode 100644 index 00000000000000..021b4ae2e100bc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-11-14-22-25-49.gh-issue-67877.G9hw0w.rst @@ -0,0 +1,2 @@ +Fix memory leaks when :mod:`regular expression ` matching terminates +abruptly, either because of a signal or because memory allocation fails. diff --git a/Modules/_sre/clinic/sre.c.h b/Modules/_sre/clinic/sre.c.h index e287f3d5ad3991..87e4785a428468 100644 --- a/Modules/_sre/clinic/sre.c.h +++ b/Modules/_sre/clinic/sre.c.h @@ -985,6 +985,44 @@ PyDoc_STRVAR(_sre_SRE_Pattern___deepcopy____doc__, #define _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF \ {"__deepcopy__", (PyCFunction)_sre_SRE_Pattern___deepcopy__, METH_O, _sre_SRE_Pattern___deepcopy____doc__}, +#if defined(Py_DEBUG) + +PyDoc_STRVAR(_sre_SRE_Pattern__fail_after__doc__, +"_fail_after($self, count, exception, /)\n" +"--\n" +"\n" +"For debugging."); + +#define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF \ + {"_fail_after", _PyCFunction_CAST(_sre_SRE_Pattern__fail_after), METH_FASTCALL, _sre_SRE_Pattern__fail_after__doc__}, + +static PyObject * +_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count, + PyObject *exception); + +static PyObject * +_sre_SRE_Pattern__fail_after(PatternObject *self, PyObject *const *args, Py_ssize_t nargs) +{ + PyObject *return_value = NULL; + int count; + PyObject *exception; + + if (!_PyArg_CheckPositional("_fail_after", nargs, 2, 2)) { + goto exit; + } + count = PyLong_AsInt(args[0]); + if (count == -1 && PyErr_Occurred()) { + goto exit; + } + exception = args[1]; + return_value = _sre_SRE_Pattern__fail_after_impl(self, count, exception); + +exit: + return return_value; +} + +#endif /* defined(Py_DEBUG) */ + PyDoc_STRVAR(_sre_compile__doc__, "compile($module, /, pattern, flags, code, groups, groupindex,\n" " indexgroup)\n" @@ -1474,4 +1512,8 @@ _sre_SRE_Scanner_search(ScannerObject *self, PyTypeObject *cls, PyObject *const } return _sre_SRE_Scanner_search_impl(self, cls); } -/*[clinic end generated code: output=afaa301d55957cb0 input=a9049054013a1b77]*/ + +#ifndef _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF + #define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF +#endif /* !defined(_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF) */ +/*[clinic end generated code: output=f8cb77f2261f0b2e input=a9049054013a1b77]*/ diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 2c86f8869d8e58..d640f63874bc11 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -267,6 +267,48 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size) return 0; } +LOCAL(SRE_REPEAT *) +add_repeat(SRE_STATE* state, const SRE_CODE* pattern) +{ + SRE_REPEAT *rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(SRE_REPEAT)); + if (!rep) { + return NULL; + } + rep->count = -1; + rep->last_ptr = NULL; + rep->prevlink = NULL; + if (state->repeats) { + rep->nextlink = state->repeats; + state->repeats->prevlink = rep; + } + else { + rep->nextlink = NULL; + } + state->repeats = rep; + + rep->pattern = pattern; + rep->prev = state->repeat; + state->repeat = rep; + return rep; +} + +LOCAL(void) +remove_repeat(SRE_STATE* state, SRE_REPEAT *rep) +{ + state->repeat = rep->prev; + + if (rep->nextlink) { + rep->nextlink->prevlink = rep->prevlink; + } + if (rep->prevlink) { + rep->prevlink->nextlink = rep->nextlink; + } + if (state->repeats == rep) { + state->repeats = rep->nextlink; + } + PyMem_Free(rep); +} + /* generate 8-bit version */ #define SRE_CHAR Py_UCS1 @@ -397,6 +439,18 @@ _sre_unicode_tolower_impl(PyObject *module, int character) return sre_lower_unicode(character); } +LOCAL(void) +state_clean_repeat_data(SRE_STATE* state) +{ + SRE_REPEAT *rep = state->repeats; + state->repeats = NULL; + while (rep) { + SRE_REPEAT *prev = rep->prevlink; + PyMem_Free(rep); + rep = prev; + } +} + LOCAL(void) state_reset(SRE_STATE* state) { @@ -406,8 +460,7 @@ state_reset(SRE_STATE* state) state->lastmark = -1; state->lastindex = -1; - state->repeat = NULL; - + state_clean_repeat_data(state); data_stack_dealloc(state); } @@ -511,6 +564,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->pos = start; state->endpos = end; +#ifdef Py_DEBUG + state->fail_after_count = pattern->fail_after_count; + state->fail_after_exc = pattern->fail_after_exc; // borrowed ref +#endif + return string; err: /* We add an explicit cast here because MSVC has a bug when @@ -524,15 +582,21 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, } LOCAL(void) -state_fini(SRE_STATE* state) +state_fini(SRE_STATE* state, PatternObject *pattern) { if (state->buffer.buf) PyBuffer_Release(&state->buffer); Py_XDECREF(state->string); + state_clean_repeat_data(state); data_stack_dealloc(state); /* See above PyMem_Free() for why we explicitly cast here. */ PyMem_Free((void*) state->mark); state->mark = NULL; +#ifdef Py_DEBUG + if (pattern) { + pattern->fail_after_count = -1; + } +#endif } /* calculate offset from start of string */ @@ -619,6 +683,9 @@ pattern_traverse(PatternObject *self, visitproc visit, void *arg) Py_VISIT(self->groupindex); Py_VISIT(self->indexgroup); Py_VISIT(self->pattern); +#ifdef Py_DEBUG + Py_VISIT(self->fail_after_exc); +#endif return 0; } @@ -628,6 +695,9 @@ pattern_clear(PatternObject *self) Py_CLEAR(self->groupindex); Py_CLEAR(self->indexgroup); Py_CLEAR(self->pattern); +#ifdef Py_DEBUG + Py_CLEAR(self->fail_after_exc); +#endif return 0; } @@ -690,7 +760,7 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, Py_ssize_t status; PyObject *match; - if (!state_init(&state, (PatternObject *)self, string, pos, endpos)) + if (!state_init(&state, self, string, pos, endpos)) return NULL; INIT_TRACE(&state); @@ -702,12 +772,12 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); if (PyErr_Occurred()) { - state_fini(&state); + state_fini(&state, self); return NULL; } match = pattern_new_match(module_state, self, &state, status); - state_fini(&state); + state_fini(&state, self); return match; } @@ -747,12 +817,12 @@ _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); if (PyErr_Occurred()) { - state_fini(&state); + state_fini(&state, self); return NULL; } match = pattern_new_match(module_state, self, &state, status); - state_fini(&state); + state_fini(&state, self); return match; } @@ -792,12 +862,12 @@ _sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); if (PyErr_Occurred()) { - state_fini(&state); + state_fini(&state, self); return NULL; } match = pattern_new_match(module_state, self, &state, status); - state_fini(&state); + state_fini(&state, self); return match; } @@ -826,7 +896,7 @@ _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, list = PyList_New(0); if (!list) { - state_fini(&state); + state_fini(&state, self); return NULL; } @@ -888,12 +958,12 @@ _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, state.start = state.ptr; } - state_fini(&state); + state_fini(&state, self); return list; error: Py_DECREF(list); - state_fini(&state); + state_fini(&state, self); return NULL; } @@ -989,7 +1059,7 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, list = PyList_New(0); if (!list) { - state_fini(&state); + state_fini(&state, self); return NULL; } @@ -1053,12 +1123,12 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, if (status < 0) goto error; - state_fini(&state); + state_fini(&state, self); return list; error: Py_DECREF(list); - state_fini(&state); + state_fini(&state, self); return NULL; } @@ -1185,7 +1255,7 @@ pattern_subx(_sremodulestate* module_state, list = PyList_New(0); if (!list) { Py_DECREF(filter); - state_fini(&state); + state_fini(&state, self); return NULL; } @@ -1271,7 +1341,7 @@ pattern_subx(_sremodulestate* module_state, goto error; } - state_fini(&state); + state_fini(&state, self); Py_DECREF(filter); @@ -1303,7 +1373,7 @@ pattern_subx(_sremodulestate* module_state, error: Py_DECREF(list); - state_fini(&state); + state_fini(&state, self); Py_DECREF(filter); return NULL; @@ -1381,6 +1451,29 @@ _sre_SRE_Pattern___deepcopy__(PatternObject *self, PyObject *memo) return Py_NewRef(self); } +#ifdef Py_DEBUG +/*[clinic input] +_sre.SRE_Pattern._fail_after + + count: int + exception: object + / + +For debugging. +[clinic start generated code]*/ + +static PyObject * +_sre_SRE_Pattern__fail_after_impl(PatternObject *self, int count, + PyObject *exception) +/*[clinic end generated code: output=9a6bf12135ac50c2 input=ef80a45c66c5499d]*/ +{ + self->fail_after_count = count; + Py_INCREF(exception); + Py_XSETREF(self->fail_after_exc, exception); + Py_RETURN_NONE; +} +#endif /* Py_DEBUG */ + static PyObject * pattern_repr(PatternObject *obj) { @@ -1506,6 +1599,11 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, self->pattern = NULL; self->groupindex = NULL; self->indexgroup = NULL; +#ifdef Py_DEBUG + self->fail_after_count = -1; + self->fail_after_exc = NULL; + self->fail_after_exc = Py_NewRef(PyExc_RuntimeError); +#endif self->codesize = n; @@ -2680,7 +2778,7 @@ scanner_dealloc(ScannerObject* self) PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - state_fini(&self->state); + state_fini(&self->state, self->pattern); (void)scanner_clear(self); tp->tp_free(self); Py_DECREF(tp); @@ -2826,7 +2924,8 @@ pattern_scanner(_sremodulestate *module_state, return NULL; } - scanner->pattern = Py_NewRef(self); + Py_INCREF(self); + scanner->pattern = self; PyObject_GC_Track(scanner); return (PyObject*) scanner; @@ -3020,6 +3119,7 @@ static PyMethodDef pattern_methods[] = { _SRE_SRE_PATTERN_SCANNER_METHODDEF _SRE_SRE_PATTERN___COPY___METHODDEF _SRE_SRE_PATTERN___DEEPCOPY___METHODDEF + _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF {"__class_getitem__", Py_GenericAlias, METH_O|METH_CLASS, PyDoc_STR("See PEP 585")}, {NULL, NULL} diff --git a/Modules/_sre/sre.h b/Modules/_sre/sre.h index 83d89d57b11199..29085f4c029c89 100644 --- a/Modules/_sre/sre.h +++ b/Modules/_sre/sre.h @@ -34,6 +34,11 @@ typedef struct { int flags; /* flags used when compiling pattern source */ PyObject *weakreflist; /* List of weak references */ int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */ +#ifdef Py_DEBUG + /* for simulation of user interruption */ + int fail_after_count; + PyObject *fail_after_exc; +#endif /* pattern code */ Py_ssize_t codesize; SRE_CODE code[1]; @@ -68,6 +73,9 @@ typedef struct SRE_REPEAT_T { const SRE_CODE* pattern; /* points to REPEAT operator arguments */ const void* last_ptr; /* helper to check for infinite loops */ struct SRE_REPEAT_T *prev; /* points to previous repeat context */ + /* links for double-linked list of all repeat contexts */ + struct SRE_REPEAT_T *nextlink; + struct SRE_REPEAT_T *prevlink; } SRE_REPEAT; typedef struct { @@ -95,12 +103,18 @@ typedef struct { size_t data_stack_base; /* current repeat context */ SRE_REPEAT *repeat; + /* linked list of all repeat contexts */ + SRE_REPEAT *repeats; unsigned int sigcount; +#ifdef Py_DEBUG + int fail_after_count; + PyObject *fail_after_exc; +#endif } SRE_STATE; typedef struct { PyObject_HEAD - PyObject* pattern; + PatternObject* pattern; SRE_STATE state; int executing; } ScannerObject; diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 97fbb0a75e54b6..e471c231e55f21 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -560,13 +560,29 @@ typedef struct { Py_ssize_t last_ctx_pos; } SRE(match_context); -#define MAYBE_CHECK_SIGNALS \ +#define _MAYBE_CHECK_SIGNALS \ do { \ if ((0 == (++sigcount & 0xfff)) && PyErr_CheckSignals()) { \ RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ } \ } while (0) +#ifdef Py_DEBUG +# define MAYBE_CHECK_SIGNALS \ + do { \ + _MAYBE_CHECK_SIGNALS; \ + if (state->fail_after_count == 0) { \ + PyErr_SetNone(state->fail_after_exc); \ + RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ + } \ + if (state->fail_after_count > 0) { \ + state->fail_after_count--; \ + } \ + } while (0) +#else +# define MAYBE_CHECK_SIGNALS _MAYBE_CHECK_SIGNALS +#endif /* Py_DEBUG */ + #ifdef HAVE_COMPUTED_GOTOS #ifndef USE_COMPUTED_GOTOS #define USE_COMPUTED_GOTOS 1 @@ -1120,23 +1136,14 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) pattern[1], pattern[2])); /* install new repeat context */ - /* TODO(https://github.com/python/cpython/issues/67877): Fix this - * potential memory leak. */ - ctx->u.rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(*ctx->u.rep)); + ctx->u.rep = add_repeat(state, pattern); if (!ctx->u.rep) { - PyErr_NoMemory(); - RETURN_FAILURE; + RETURN_ERROR(SRE_ERROR_MEMORY); } - ctx->u.rep->count = -1; - ctx->u.rep->pattern = pattern; - ctx->u.rep->prev = state->repeat; - ctx->u.rep->last_ptr = NULL; - state->repeat = ctx->u.rep; state->ptr = ptr; DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); - state->repeat = ctx->u.rep->prev; - PyMem_Free(ctx->u.rep); + remove_repeat(state, ctx->u.rep); if (ret) { RETURN_ON_ERROR(ret); From 41e60ff6208db60766f96c8e4341ecfa82b5da09 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 15 Nov 2024 22:50:46 +0200 Subject: [PATCH 2/4] Use a pool for SRE_REPEAT. --- Lib/test/test_re.py | 1 + Modules/_sre/sre.c | 118 ++++++++++++++++++++++++++--------------- Modules/_sre/sre.h | 11 ++-- Modules/_sre/sre_lib.h | 10 +++- 4 files changed, 90 insertions(+), 50 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index af36806bffeaf3..cc0add39a03579 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2689,6 +2689,7 @@ class Interrupt(Exception): p._fail_after(n, Interrupt) try: p.match(string) + print(n) return n except Interrupt: pass diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index d640f63874bc11..2cdff035e17b7a 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -267,46 +267,83 @@ data_stack_grow(SRE_STATE* state, Py_ssize_t size) return 0; } -LOCAL(SRE_REPEAT *) -add_repeat(SRE_STATE* state, const SRE_CODE* pattern) -{ - SRE_REPEAT *rep = (SRE_REPEAT*) PyMem_Malloc(sizeof(SRE_REPEAT)); - if (!rep) { - return NULL; - } - rep->count = -1; - rep->last_ptr = NULL; - rep->prevlink = NULL; - if (state->repeats) { - rep->nextlink = state->repeats; - state->repeats->prevlink = rep; +/* memory pool functions for SRE_REPEAT, this can avoid memory + leak when SRE(match) function terminates abruptly. + state->repeat_pool_used is a doubly-linked list, so that we + can remove a SRE_REPEAT node from it. + state->repeat_pool_unused is a singly-linked list, we put/get + node at the head. */ +static SRE_REPEAT * +repeat_pool_malloc(SRE_STATE *state) +{ + SRE_REPEAT *repeat; + + if (state->repeat_pool_unused) { + /* remove from unused pool (singly-linked list) */ + repeat = state->repeat_pool_unused; + state->repeat_pool_unused = repeat->pool_next; } else { - rep->nextlink = NULL; + repeat = PyMem_Malloc(sizeof(SRE_REPEAT)); + if (!repeat) { + return NULL; + } + } + + /* add to used pool (doubly-linked list) */ + SRE_REPEAT *temp = state->repeat_pool_used; + if (temp) { + temp->pool_prev = repeat; } - state->repeats = rep; + repeat->pool_prev = NULL; + repeat->pool_next = temp; + state->repeat_pool_used = repeat; - rep->pattern = pattern; - rep->prev = state->repeat; - state->repeat = rep; - return rep; + return repeat; } -LOCAL(void) -remove_repeat(SRE_STATE* state, SRE_REPEAT *rep) +static void +repeat_pool_free(SRE_STATE *state, SRE_REPEAT *repeat) { - state->repeat = rep->prev; + SRE_REPEAT *prev = repeat->pool_prev; + SRE_REPEAT *next = repeat->pool_next; - if (rep->nextlink) { - rep->nextlink->prevlink = rep->prevlink; + /* remove from used pool (doubly-linked list) */ + if (prev) { + prev->pool_next = next; } - if (rep->prevlink) { - rep->prevlink->nextlink = rep->nextlink; + else { + state->repeat_pool_used = next; } - if (state->repeats == rep) { - state->repeats = rep->nextlink; + if (next) { + next->pool_prev = prev; + } + + /* add to unused pool (singly-linked list) */ + repeat->pool_next = state->repeat_pool_unused; + state->repeat_pool_unused = repeat; +} + +static void +repeat_pool_clear(SRE_STATE *state) +{ + /* clear used pool */ + SRE_REPEAT *next = state->repeat_pool_used; + state->repeat_pool_used = NULL; + while (next) { + SRE_REPEAT *temp = next; + next = temp->pool_next; + PyMem_Free(temp); + } + + /* clear unused pool */ + next = state->repeat_pool_unused; + state->repeat_pool_unused = NULL; + while (next) { + SRE_REPEAT *temp = next; + next = temp->pool_next; + PyMem_Free(temp); } - PyMem_Free(rep); } /* generate 8-bit version */ @@ -439,18 +476,6 @@ _sre_unicode_tolower_impl(PyObject *module, int character) return sre_lower_unicode(character); } -LOCAL(void) -state_clean_repeat_data(SRE_STATE* state) -{ - SRE_REPEAT *rep = state->repeats; - state->repeats = NULL; - while (rep) { - SRE_REPEAT *prev = rep->prevlink; - PyMem_Free(rep); - rep = prev; - } -} - LOCAL(void) state_reset(SRE_STATE* state) { @@ -460,7 +485,8 @@ state_reset(SRE_STATE* state) state->lastmark = -1; state->lastindex = -1; - state_clean_repeat_data(state); + state->repeat = NULL; + data_stack_dealloc(state); } @@ -555,6 +581,11 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->must_advance = 0; state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0); + /* SRE_REPEAT pool */ + state->repeat = NULL; + state->repeat_pool_used = NULL; + state->repeat_pool_unused = NULL; + state->beginning = ptr; state->start = (void*) ((char*) ptr + start * state->charsize); @@ -587,11 +618,12 @@ state_fini(SRE_STATE* state, PatternObject *pattern) if (state->buffer.buf) PyBuffer_Release(&state->buffer); Py_XDECREF(state->string); - state_clean_repeat_data(state); data_stack_dealloc(state); /* See above PyMem_Free() for why we explicitly cast here. */ PyMem_Free((void*) state->mark); state->mark = NULL; + /* SRE_REPEAT pool */ + repeat_pool_clear(state); #ifdef Py_DEBUG if (pattern) { pattern->fail_after_count = -1; diff --git a/Modules/_sre/sre.h b/Modules/_sre/sre.h index 29085f4c029c89..42681c2addf3c2 100644 --- a/Modules/_sre/sre.h +++ b/Modules/_sre/sre.h @@ -73,9 +73,9 @@ typedef struct SRE_REPEAT_T { const SRE_CODE* pattern; /* points to REPEAT operator arguments */ const void* last_ptr; /* helper to check for infinite loops */ struct SRE_REPEAT_T *prev; /* points to previous repeat context */ - /* links for double-linked list of all repeat contexts */ - struct SRE_REPEAT_T *nextlink; - struct SRE_REPEAT_T *prevlink; + /* for SRE_REPEAT pool */ + struct SRE_REPEAT_T *pool_prev; + struct SRE_REPEAT_T *pool_next; } SRE_REPEAT; typedef struct { @@ -103,8 +103,9 @@ typedef struct { size_t data_stack_base; /* current repeat context */ SRE_REPEAT *repeat; - /* linked list of all repeat contexts */ - SRE_REPEAT *repeats; + /* SRE_REPEAT pool */ + SRE_REPEAT *repeat_pool_used; + SRE_REPEAT *repeat_pool_unused; unsigned int sigcount; #ifdef Py_DEBUG int fail_after_count; diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index e471c231e55f21..dbec92dd7ee0a9 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1136,14 +1136,20 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) pattern[1], pattern[2])); /* install new repeat context */ - ctx->u.rep = add_repeat(state, pattern); + ctx->u.rep = repeat_pool_malloc(state); if (!ctx->u.rep) { RETURN_ERROR(SRE_ERROR_MEMORY); } + ctx->u.rep->count = -1; + ctx->u.rep->pattern = pattern; + ctx->u.rep->prev = state->repeat; + ctx->u.rep->last_ptr = NULL; + state->repeat = ctx->u.rep; state->ptr = ptr; DO_JUMP(JUMP_REPEAT, jump_repeat, pattern+pattern[0]); - remove_repeat(state, ctx->u.rep); + state->repeat = ctx->u.rep->prev; + repeat_pool_free(state, ctx->u.rep); if (ret) { RETURN_ON_ERROR(ret); From c76a91775fdabcd70bd1fcf7984661704f2f22b7 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 16 Nov 2024 11:12:12 +0200 Subject: [PATCH 3/4] Clean up. --- Lib/test/test_re.py | 5 ++-- Modules/_sre/sre.c | 52 +++++++++++++++++------------------------- Modules/_sre/sre_lib.h | 10 ++++---- 3 files changed, 29 insertions(+), 38 deletions(-) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index cc0add39a03579..1612fc7663e87e 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2686,13 +2686,14 @@ class Interrupt(Exception): pass p = re.compile(pattern) for n in range(maxcount): - p._fail_after(n, Interrupt) try: + p._fail_after(n, Interrupt) p.match(string) - print(n) return n except Interrupt: pass + finally: + p._fail_after(-1, None) @unittest.skipUnless(hasattr(re.Pattern, '_fail_after'), 'requires debug build') def test_memory_leaks(self): diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 2cdff035e17b7a..36f542ddb4df2b 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -581,11 +581,6 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, state->must_advance = 0; state->debug = ((pattern->flags & SRE_FLAG_DEBUG) != 0); - /* SRE_REPEAT pool */ - state->repeat = NULL; - state->repeat_pool_used = NULL; - state->repeat_pool_unused = NULL; - state->beginning = ptr; state->start = (void*) ((char*) ptr + start * state->charsize); @@ -613,7 +608,7 @@ state_init(SRE_STATE* state, PatternObject* pattern, PyObject* string, } LOCAL(void) -state_fini(SRE_STATE* state, PatternObject *pattern) +state_fini(SRE_STATE* state) { if (state->buffer.buf) PyBuffer_Release(&state->buffer); @@ -624,11 +619,6 @@ state_fini(SRE_STATE* state, PatternObject *pattern) state->mark = NULL; /* SRE_REPEAT pool */ repeat_pool_clear(state); -#ifdef Py_DEBUG - if (pattern) { - pattern->fail_after_count = -1; - } -#endif } /* calculate offset from start of string */ @@ -804,12 +794,12 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); if (PyErr_Occurred()) { - state_fini(&state, self); + state_fini(&state); return NULL; } match = pattern_new_match(module_state, self, &state, status); - state_fini(&state, self); + state_fini(&state); return match; } @@ -849,12 +839,12 @@ _sre_SRE_Pattern_fullmatch_impl(PatternObject *self, PyTypeObject *cls, TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); if (PyErr_Occurred()) { - state_fini(&state, self); + state_fini(&state); return NULL; } match = pattern_new_match(module_state, self, &state, status); - state_fini(&state, self); + state_fini(&state); return match; } @@ -894,12 +884,12 @@ _sre_SRE_Pattern_search_impl(PatternObject *self, PyTypeObject *cls, TRACE(("|%p|%p|END\n", PatternObject_GetCode(self), state.ptr)); if (PyErr_Occurred()) { - state_fini(&state, self); + state_fini(&state); return NULL; } match = pattern_new_match(module_state, self, &state, status); - state_fini(&state, self); + state_fini(&state); return match; } @@ -928,7 +918,7 @@ _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, list = PyList_New(0); if (!list) { - state_fini(&state, self); + state_fini(&state); return NULL; } @@ -990,12 +980,12 @@ _sre_SRE_Pattern_findall_impl(PatternObject *self, PyObject *string, state.start = state.ptr; } - state_fini(&state, self); + state_fini(&state); return list; error: Py_DECREF(list); - state_fini(&state, self); + state_fini(&state); return NULL; } @@ -1091,7 +1081,7 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, list = PyList_New(0); if (!list) { - state_fini(&state, self); + state_fini(&state); return NULL; } @@ -1155,12 +1145,12 @@ _sre_SRE_Pattern_split_impl(PatternObject *self, PyObject *string, if (status < 0) goto error; - state_fini(&state, self); + state_fini(&state); return list; error: Py_DECREF(list); - state_fini(&state, self); + state_fini(&state); return NULL; } @@ -1287,7 +1277,7 @@ pattern_subx(_sremodulestate* module_state, list = PyList_New(0); if (!list) { Py_DECREF(filter); - state_fini(&state, self); + state_fini(&state); return NULL; } @@ -1373,7 +1363,7 @@ pattern_subx(_sremodulestate* module_state, goto error; } - state_fini(&state, self); + state_fini(&state); Py_DECREF(filter); @@ -1405,7 +1395,7 @@ pattern_subx(_sremodulestate* module_state, error: Py_DECREF(list); - state_fini(&state, self); + state_fini(&state); Py_DECREF(filter); return NULL; @@ -1634,7 +1624,6 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags, #ifdef Py_DEBUG self->fail_after_count = -1; self->fail_after_exc = NULL; - self->fail_after_exc = Py_NewRef(PyExc_RuntimeError); #endif self->codesize = n; @@ -2734,7 +2723,8 @@ pattern_new_match(_sremodulestate* module_state, if (!match) return NULL; - match->pattern = (PatternObject*)Py_NewRef(pattern); + Py_INCREF(pattern); + match->pattern = pattern; match->string = Py_NewRef(state->string); @@ -2810,7 +2800,7 @@ scanner_dealloc(ScannerObject* self) PyTypeObject *tp = Py_TYPE(self); PyObject_GC_UnTrack(self); - state_fini(&self->state, self->pattern); + state_fini(&self->state); (void)scanner_clear(self); tp->tp_free(self); Py_DECREF(tp); @@ -2870,7 +2860,7 @@ _sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) return NULL; } - match = pattern_new_match(module_state, (PatternObject*) self->pattern, + match = pattern_new_match(module_state, self->pattern, state, status); if (status == 0) @@ -2920,7 +2910,7 @@ _sre_SRE_Scanner_search_impl(ScannerObject *self, PyTypeObject *cls) return NULL; } - match = pattern_new_match(module_state, (PatternObject*) self->pattern, + match = pattern_new_match(module_state, self->pattern, state, status); if (status == 0) diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index dbec92dd7ee0a9..efc4424bb3a9ab 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -571,11 +571,11 @@ typedef struct { # define MAYBE_CHECK_SIGNALS \ do { \ _MAYBE_CHECK_SIGNALS; \ - if (state->fail_after_count == 0) { \ - PyErr_SetNone(state->fail_after_exc); \ - RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ - } \ - if (state->fail_after_count > 0) { \ + if (state->fail_after_count >= 0) { \ + if (state->fail_after_count == 0) { \ + PyErr_SetNone(state->fail_after_exc); \ + RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ + } \ state->fail_after_count--; \ } \ } while (0) From 3b4e81a2567a7e842ef42c606704d14ed7e2b13f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 16 Nov 2024 11:54:21 +0200 Subject: [PATCH 4/4] Set state->fail_after_count = -1 after triggering an exception. --- Modules/_sre/sre_lib.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index efc4424bb3a9ab..0c93f5121103e8 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -572,11 +572,10 @@ typedef struct { do { \ _MAYBE_CHECK_SIGNALS; \ if (state->fail_after_count >= 0) { \ - if (state->fail_after_count == 0) { \ + if (state->fail_after_count-- == 0) { \ PyErr_SetNone(state->fail_after_exc); \ RETURN_ERROR(SRE_ERROR_INTERRUPTED); \ } \ - state->fail_after_count--; \ } \ } while (0) #else