From ac620e77858de1c629b46c6a8d446f6979df2a49 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 30 Nov 2022 11:36:06 +0000 Subject: [PATCH 1/2] [3.11] gh-99891: Fix infinite recursion in the tokenizer when showing warnings (GH-99893) Automerge-Triggered-By: GH:pablogsal. (cherry picked from commit 417206a05c4545bde96c2bbbea92b53e6cac0d48) Co-authored-by: Pablo Galindo Salgado --- Lib/test/test_source_encoding.py | 12 ++++++++++++ .../2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst | 3 +++ Parser/tokenizer.c | 14 ++++++++++++++ Parser/tokenizer.h | 1 + 4 files changed, 30 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst diff --git a/Lib/test/test_source_encoding.py b/Lib/test/test_source_encoding.py index e357264eb1d165..5fe0f3124444ba 100644 --- a/Lib/test/test_source_encoding.py +++ b/Lib/test/test_source_encoding.py @@ -161,6 +161,18 @@ def test_file_parse_error_multiline(self): finally: os.unlink(TESTFN) + def test_tokenizer_fstring_warning_in_first_line(self): + source = "0b1and 2" + with open(TESTFN, "w") as fd: + fd.write("{}".format(source)) + try: + retcode, stdout, stderr = script_helper.assert_python_ok(TESTFN) + self.assertIn(b"SyntaxWarning: invalid binary litera", stderr) + self.assertEqual(stderr.count(source.encode()), 1) + finally: + os.unlink(TESTFN) + + class AbstractSourceEncodingTest: def test_default_coding(self): diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst b/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst new file mode 100644 index 00000000000000..20cd361affeaa5 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-11-30-11-09-40.gh-issue-99891.9VomwB.rst @@ -0,0 +1,3 @@ +Fix a bug in the tokenizer that could cause infinite recursion when showing +syntax warnings that happen in the first line of the source. Patch by Pablo +Galindo diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index eda38a09a995ae..d6b064d1cee2dd 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -88,6 +88,7 @@ tok_new(void) tok->async_def_nl = 0; tok->interactive_underflow = IUNDERFLOW_NORMAL; tok->str = NULL; + tok->report_warnings = 1; return tok; } @@ -1186,6 +1187,10 @@ indenterror(struct tok_state *tok) static int parser_warn(struct tok_state *tok, PyObject *category, const char *format, ...) { + if (!tok->report_warnings) { + return 0; + } + PyObject *errmsg; va_list vargs; #ifdef HAVE_STDARG_PROTOTYPES @@ -2194,6 +2199,15 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) return encoding; } } +<<<<<<< HEAD +||||||| parent of 417206a05c (gh-99891: Fix infinite recursion in the tokenizer when showing warnings (GH-99893)) + struct token token; +======= + struct token token; + // We don't want to report warnings here because it could cause infinite recursion + // if fetching the encoding shows a warning. + tok->report_warnings = 0; +>>>>>>> 417206a05c (gh-99891: Fix infinite recursion in the tokenizer when showing warnings (GH-99893)) while (tok->lineno < 2 && tok->done == E_OK) { _PyTokenizer_Get(tok, &p_start, &p_end); } diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h index 0cb665104b2b86..d9a5f457d9c501 100644 --- a/Parser/tokenizer.h +++ b/Parser/tokenizer.h @@ -84,6 +84,7 @@ struct tok_state { NEWLINE token after it. */ /* How to proceed when asked for a new token in interactive mode */ enum interactive_underflow_t interactive_underflow; + int report_warnings; }; extern struct tok_state *_PyTokenizer_FromString(const char *, int); From 974dff69221df4cc0e788cd63c67f459742fbc53 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Salgado Date: Wed, 30 Nov 2022 11:39:49 +0000 Subject: [PATCH 2/2] Update Parser/tokenizer.c --- Parser/tokenizer.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index d6b064d1cee2dd..ca11c7bebb4eb1 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -2199,15 +2199,9 @@ _PyTokenizer_FindEncodingFilename(int fd, PyObject *filename) return encoding; } } -<<<<<<< HEAD -||||||| parent of 417206a05c (gh-99891: Fix infinite recursion in the tokenizer when showing warnings (GH-99893)) - struct token token; -======= - struct token token; // We don't want to report warnings here because it could cause infinite recursion // if fetching the encoding shows a warning. tok->report_warnings = 0; ->>>>>>> 417206a05c (gh-99891: Fix infinite recursion in the tokenizer when showing warnings (GH-99893)) while (tok->lineno < 2 && tok->done == E_OK) { _PyTokenizer_Get(tok, &p_start, &p_end); }