From 34d364bb65573cd0248bbaa9e1ae2efe33e3aa70 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Thu, 26 Oct 2023 14:33:44 +0300 Subject: [PATCH 1/2] gh-101955: Fix SystemError in possesive quantifier with alternative and group --- Lib/test/test_re.py | 6 ++++ ...-10-26-16-36-22.gh-issue-101955.Ixu3IF.rst | 2 ++ Modules/_sre/sre_lib.h | 36 +++++++++++++++++++ 3 files changed, 44 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-10-26-16-36-22.gh-issue-101955.Ixu3IF.rst diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 1eca22f45378df..4147a538510ebd 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -2446,6 +2446,12 @@ def test_bug_gh100061(self): self.assertEqual(re.match("(?>(?:ab?c){1,3})", "aca").span(), (0, 2)) self.assertEqual(re.match("(?:ab?c){1,3}+", "aca").span(), (0, 2)) + def test_bug_gh101955(self): + # Possessive quantifier with nested alternative with capture groups + self.assertEqual(re.match('((x)|y|z)*+', 'xyz').groups(), ('z', 'x')) + self.assertEqual(re.match('((x)|y|z){3}+', 'xyz').groups(), ('z', 'x')) + self.assertEqual(re.match('((x)|y|z){3,}+', 'xyz').groups(), ('z', 'x')) + @unittest.skipIf(multiprocessing is None, 'test requires multiprocessing') def test_regression_gh94675(self): pattern = re.compile(r'(?<=[({}])(((//[^\n]*)?[\n])([\000-\040])*)*' diff --git a/Misc/NEWS.d/next/Library/2023-10-26-16-36-22.gh-issue-101955.Ixu3IF.rst b/Misc/NEWS.d/next/Library/2023-10-26-16-36-22.gh-issue-101955.Ixu3IF.rst new file mode 100644 index 00000000000000..89431010f784f8 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-10-26-16-36-22.gh-issue-101955.Ixu3IF.rst @@ -0,0 +1,2 @@ +Fix SystemError when match regular expression pattern containing some +combination of possessive quantifier, alternative and capture group. diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index 92dd725c70fd38..e765c8be43b6cd 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1255,6 +1255,26 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, ptr, pattern[1], pattern[2])); + /* Create repeat context. + * It is only needed to set state->repeat to non-NULL, so nested + * BRANCH will be able to restore marks. */ + if (state->repeat == NULL) { + /* See comment in SRE_OP_REPEAT about potential memory leak. */ + ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep)); + if (!ctx->u.rep) { + PyErr_NoMemory(); + RETURN_FAILURE; + } + ctx->u.rep->count = -1; + ctx->u.rep->pattern = NULL; + ctx->u.rep->prev = state->repeat; + ctx->u.rep->last_ptr = NULL; + state->repeat = ctx->u.rep; + } + else { + ctx->u.rep = NULL; + } + /* Set the global Input pointer to this context's Input pointer */ state->ptr = ptr; @@ -1267,6 +1287,12 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) /* not enough matches */ DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, &pattern[3]); + if (ret <= 0) { // error or failure + if (ctx->u.rep && !ctx->u.rep->pattern) { + state->repeat = ctx->u.rep->prev; + PyObject_Free(ctx->u.rep); + } + } if (ret) { RETURN_ON_ERROR(ret); ctx->count++; @@ -1317,6 +1343,12 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) /* Check to see if the last attempted match succeeded. */ + if (ret < 0) { // error + if (ctx->u.rep && !ctx->u.rep->pattern) { + state->repeat = ctx->u.rep->prev; + PyObject_Free(ctx->u.rep); + } + } if (ret) { /* Drop the saved highest number Capture Group marker saved above and use the newly updated @@ -1344,6 +1376,10 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) break; } } + if (ctx->u.rep && !ctx->u.rep->pattern) { + state->repeat = ctx->u.rep->prev; + PyObject_Free(ctx->u.rep); + } /* Evaluate Tail */ /* Jump to end of pattern indicated by skip, and then skip From 273e2d1431734ffec4667f02106e4ba76f2f4e00 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 18 Nov 2024 12:08:01 +0200 Subject: [PATCH 2/2] Simplify code. Co-authored-by: --- Modules/_sre/sre_lib.h | 54 ++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 36 deletions(-) diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h index cf19697ac04942..af4bfc56083bcb 100644 --- a/Modules/_sre/sre_lib.h +++ b/Modules/_sre/sre_lib.h @@ -1302,30 +1302,21 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) TRACE(("|%p|%p|POSSESSIVE_REPEAT %d %d\n", pattern, ptr, pattern[1], pattern[2])); - /* Create repeat context. - * It is only needed to set state->repeat to non-NULL, so nested - * BRANCH will be able to restore marks. */ - if (state->repeat == NULL) { - /* See comment in SRE_OP_REPEAT about potential memory leak. */ - ctx->u.rep = (SRE_REPEAT*) PyObject_Malloc(sizeof(*ctx->u.rep)); - if (!ctx->u.rep) { - PyErr_NoMemory(); - RETURN_FAILURE; - } - ctx->u.rep->count = -1; - ctx->u.rep->pattern = NULL; - ctx->u.rep->prev = state->repeat; - ctx->u.rep->last_ptr = NULL; - state->repeat = ctx->u.rep; - } - else { - ctx->u.rep = NULL; - } - /* Set the global Input pointer to this context's Input pointer */ state->ptr = ptr; + /* Set state->repeat to non-NULL */ + ctx->u.rep = repeat_pool_malloc(state); + if (!ctx->u.rep) { + RETURN_ERROR(SRE_ERROR_MEMORY); + } + ctx->u.rep->count = -1; + ctx->u.rep->pattern = NULL; + ctx->u.rep->prev = state->repeat; + ctx->u.rep->last_ptr = NULL; + state->repeat = ctx->u.rep; + /* Initialize Count to 0 */ ctx->count = 0; @@ -1334,18 +1325,15 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) /* not enough matches */ DO_JUMP0(JUMP_POSS_REPEAT_1, jump_poss_repeat_1, &pattern[3]); - if (ret <= 0) { // error or failure - if (ctx->u.rep && !ctx->u.rep->pattern) { - state->repeat = ctx->u.rep->prev; - PyObject_Free(ctx->u.rep); - } - } if (ret) { RETURN_ON_ERROR(ret); ctx->count++; } else { state->ptr = ptr; + /* Restore state->repeat */ + state->repeat = ctx->u.rep->prev; + repeat_pool_free(state, ctx->u.rep); RETURN_FAILURE; } } @@ -1390,12 +1378,6 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) /* Check to see if the last attempted match succeeded. */ - if (ret < 0) { // error - if (ctx->u.rep && !ctx->u.rep->pattern) { - state->repeat = ctx->u.rep->prev; - PyObject_Free(ctx->u.rep); - } - } if (ret) { /* Drop the saved highest number Capture Group marker saved above and use the newly updated @@ -1423,10 +1405,10 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel) break; } } - if (ctx->u.rep && !ctx->u.rep->pattern) { - state->repeat = ctx->u.rep->prev; - PyObject_Free(ctx->u.rep); - } + + /* Restore state->repeat */ + state->repeat = ctx->u.rep->prev; + repeat_pool_free(state, ctx->u.rep); /* Evaluate Tail */ /* Jump to end of pattern indicated by skip, and then skip