diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index dd58e032a8befe..89d425d6e27aa7 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1651,6 +1651,18 @@ def __repr__(self): self.assertEqual(f"{1+2 = # my comment }", '1+2 = \n 3') + self.assertEqual(f'{""" # booo + """=}', '""" # booo\n """=\' # booo\\n \'') + + self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'') + self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'') + + self.assertEqual(f'{ # some comment goes here + """hello"""=}', ' \n """hello"""=\'hello\'') + self.assertEqual(f'{"""# this is not a comment + a""" # this is a comment + }', '# this is not a comment\n a') + # These next lines contains tabs. Backslash escapes don't # work in f-strings. # patchcheck doesn't like these tabs. So the only way to test diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst new file mode 100644 index 00000000000000..9b1f62433b45ed --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst @@ -0,0 +1,3 @@ +Fixed a bug where f-string debug expressions (using =) would incorrectly +strip out parts of strings containing escaped quotes and # characters. Patch +by Pablo Galindo. diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 4d10bccf0a53f2..04c9777cd616ae 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -121,38 +121,81 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) { } PyObject *res = NULL; - // Check if there is a # character in the expression + // Look for a # character outside of string literals int hash_detected = 0; + int in_string = 0; + char quote_char = 0; + char string_quote = 0; + for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { + char ch = tok_mode->last_expr_buffer[i]; + + // Skip escaped characters + if (ch == '\\') { + i++; + continue; + } + + // Handle quotes + if (ch == '"' || ch == '\'') { + if (!in_string) { + in_string = 1; + quote_char = ch; + } + else if (ch == quote_char) { + in_string = 0; + } + continue; + } + + // Check for # outside strings + if (ch == '#' && !in_string) { hash_detected = 1; break; } } - + // If we found a # character in the expression, we need to handle comments if (hash_detected) { - Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end; - char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char)); + // Allocate buffer for processed result + char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char)); if (!result) { return -1; } - Py_ssize_t i = 0; - Py_ssize_t j = 0; + Py_ssize_t i = 0; // Input position + Py_ssize_t j = 0; // Output position + in_string = 0; // Whether we're in a string + string_quote = 0; // Current string quote char - for (i = 0, j = 0; i < input_length; i++) { - if (tok_mode->last_expr_buffer[i] == '#') { - // Skip characters until newline or end of string - while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') { - if (tok_mode->last_expr_buffer[i] == '\n') { - result[j++] = tok_mode->last_expr_buffer[i]; - break; - } + // Process each character + while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) { + char ch = tok_mode->last_expr_buffer[i]; + + // Handle string quotes + if (ch == '"' || ch == '\'') { + if (!in_string) { + in_string = 1; + string_quote = ch; + } else if (ch == string_quote) { + in_string = 0; + } + result[j++] = ch; + } + // Skip comments + else if (ch == '#' && !in_string) { + while (i < tok_mode->last_expr_size - tok_mode->last_expr_end && + tok_mode->last_expr_buffer[i] != '\n') { i++; } - } else { - result[j++] = tok_mode->last_expr_buffer[i]; + if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) { + result[j++] = '\n'; + } + } + // Copy other chars + else { + result[j++] = ch; } + i++; } result[j] = '\0'; // Null-terminate the result string @@ -164,11 +207,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) { tok_mode->last_expr_size - tok_mode->last_expr_end, NULL ); - } - - if (!res) { + if (!res) { return -1; } token->metadata = res;