From f204fbd88a2ea153461f302fb7340fa84c7a2ea5 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 20 Nov 2023 14:13:31 +0000 Subject: [PATCH 1/2] gh-112243: Don't include comments in f-string debug expressions --- Lib/test/test_fstring.py | 3 + ...-11-20-14-13-02.gh-issue-112243.FKdQnr.rst | 1 + Parser/lexer/lexer.c | 55 +++++++++++++++++-- 3 files changed, 53 insertions(+), 6 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-20-14-13-02.gh-issue-112243.FKdQnr.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index dd8c2dd628ee13..da0160d2382cc6 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1627,6 +1627,9 @@ def __repr__(self): self.assertEqual(f'X{x = }Y', 'Xx = '+repr(x)+'Y') self.assertEqual(f"sadsd {1 + 1 = :{1 + 1:1d}f}", "sadsd 1 + 1 = 2.000000") + self.assertEqual(f"{1+2 = # my comment + }", '1+2 = \n 3') + # These next lines contains tabs. Backslash escapes don't # work in f-strings. # patchcheck doesn't like these tabs. So the only way to test diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-20-14-13-02.gh-issue-112243.FKdQnr.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-20-14-13-02.gh-issue-112243.FKdQnr.rst new file mode 100644 index 00000000000000..d69f29f5c63490 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-20-14-13-02.gh-issue-112243.FKdQnr.rst @@ -0,0 +1 @@ +Don't include comments in f-string debug expressions. Patch by Pablo Galindo diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index c7134ab868bfbd..be36a748a91572 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -112,13 +112,56 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { if (!tok_mode->f_string_debug || token->metadata) { return 0; } + PyObject *res = NULL; - PyObject *res = PyUnicode_DecodeUTF8( - tok_mode->last_expr_buffer, - tok_mode->last_expr_size - tok_mode->last_expr_end, - NULL - ); - if (!res) { + // Check if there is a # character in the expression + int hash_detected; + for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) { + if (tok_mode->last_expr_buffer[i] == '#') { + hash_detected = 1; + break; + } + } + + if (hash_detected) { + Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; + char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); + if (!result) { + return -1; + } + + Py_ssize_t i = 0; + Py_ssize_t j = 0; + + for (i = 0, j = 0; i < input_length; i++) { + if (tok_mode->last_expr_buffer[i] == '#') { + // Skip characters until newline or end of string + while (tok_mode->last_expr_buffer[i] != '\0') { + if (tok_mode->last_expr_buffer[i] == '\n') { + result[j++] = tok_mode->last_expr_buffer[i]; + break; + } + i++; + } + } else { + result[j++] = tok_mode->last_expr_buffer[i]; + } + } + + result[j] = '\0'; // Null-terminate the result string + res = PyUnicode_DecodeUTF8(result, j, NULL); + PyObject_Free(result); + } else { + res = PyUnicode_DecodeUTF8( + tok_mode->last_expr_buffer, + tok_mode->last_expr_size - tok_mode->last_expr_end, + NULL + ); + + } + + + if (!res) { return -1; } token->metadata = res; From 2637415d006ebfceea8c46834eb3bf8d0ba2ce8e Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 20 Nov 2023 14:39:11 +0000 Subject: [PATCH 2/2] fixup! gh-112243: Don't include comments in f-string debug expressions --- Parser/lexer/lexer.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index be36a748a91572..a256cbb5c4cbc0 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -115,7 +115,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { PyObject *res = NULL; // Check if there is a # character in the expression - int hash_detected; + int hash_detected = 0; for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) { if (tok_mode->last_expr_buffer[i] == '#') { hash_detected = 1; @@ -125,7 +125,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { if (hash_detected) { Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; - char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); + char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); if (!result) { return -1; } @@ -136,7 +136,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { for (i = 0, j = 0; i < input_length; i++) { if (tok_mode->last_expr_buffer[i] == '#') { // Skip characters until newline or end of string - while (tok_mode->last_expr_buffer[i] != '\0') { + while (tok_mode->last_expr_buffer[i] != '\0' && i < input_length) { if (tok_mode->last_expr_buffer[i] == '\n') { result[j++] = tok_mode->last_expr_buffer[i]; break; @@ -157,9 +157,9 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) { tok_mode->last_expr_size - tok_mode->last_expr_end, NULL ); - + } - + if (!res) { return -1;