From ef9d22f49f7bc371d2ec6f22c07fbeb1a816a431 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Fri, 6 Jun 2025 02:17:57 +0100
Subject: [PATCH 1/3] gh-135148: Correctly handle f/t strings with comments and
 debug expressions

---
 Lib/test/test_fstring.py                      |  6 ++
 ...-06-06-02-24-42.gh-issue-135148.r-t2sC.rst |  3 +
 Parser/lexer/lexer.c                          | 85 ++++++++++++++++---
 3 files changed, 81 insertions(+), 13 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index dd58e032a8befe..fcda09fc58d2c7 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1651,6 +1651,12 @@ def __repr__(self):
         self.assertEqual(f"{1+2 = # my comment
   }", '1+2 = \n  3')
 
+        self.assertEqual(f'{""" # booo
+  """=}', '""" # booo\n  """=\' # booo\\n  \'')
+
+        self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
+        self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
+
         # These next lines contains tabs.  Backslash escapes don't
         # work in f-strings.
         # patchcheck doesn't like these tabs.  So the only way to test
diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst
new file mode 100644
index 00000000000000..9b1f62433b45ed
--- /dev/null
+++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-06-06-02-24-42.gh-issue-135148.r-t2sC.rst
@@ -0,0 +1,3 @@
+Fixed a bug where f-string debug expressions (using =) would incorrectly
+strip out parts of strings containing escaped quotes and # characters. Patch
+by Pablo Galindo.
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 4d10bccf0a53f2..bfa98b2fe70d43 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -123,35 +123,96 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
 
     // Check if there is a # character in the expression
     int hash_detected = 0;
+    int in_string = 0;
+    char string_quote = 0;
     for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
-        if (tok_mode->last_expr_buffer[i] == '#') {
+        char ch = tok_mode->last_expr_buffer[i];
+        if (ch == '\\' && i + 1 < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+            // Skip the next character if it's an escape sequence
+            i++;
+            continue;
+        }
+        if (ch == '"' || ch == '\'') {
+            if (!in_string) {
+                in_string = 1;
+                string_quote = ch;
+            } else if (ch == string_quote) {
+                // Check for triple quotes
+                if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
+                    i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
+                    // Skip the rest of the triple quote
+                    i += 2;
+                }
+                in_string = 0;
+            }
+        } else if (ch == '#' && !in_string) {
             hash_detected = 1;
             break;
         }
     }
-
+    // If we found a # character in the expression, we need to handle comments
     if (hash_detected) {
+        // Calculate length of input we need to process
         Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
+
+        // Allocate buffer for processed result, with room for null terminator
         char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
         if (!result) {
             return -1;
         }
 
-        Py_ssize_t i = 0;
-        Py_ssize_t j = 0;
+        // Initialize counters and state
+        Py_ssize_t i = 0;  // Input position
+        Py_ssize_t j = 0;  // Output position
+        in_string = 0;     // Whether we're currently inside a string
+        string_quote = 0;  // The quote character for current string (' or ")
 
+        // Process each character of input
         for (i = 0, j = 0; i < input_length; i++) {
-            if (tok_mode->last_expr_buffer[i] == '#') {
-                // Skip characters until newline or end of string
+            char ch = tok_mode->last_expr_buffer[i];
+
+            // Handle escape sequences - copy both backslash and next char
+            if (ch == '\\' && i + 1 < input_length) {
+                result[j++] = ch;  // Copy backslash
+                result[j++] = tok_mode->last_expr_buffer[++i];  // Copy escaped char
+                continue;
+            }
+
+            // Handle string quotes
+            if (ch == '"' || ch == '\'') {
+                if (!in_string) {
+                    // Start of new string
+                    in_string = 1;
+                    string_quote = ch;
+                } else if (ch == string_quote) {
+                    // Potential end of string - check for triple quotes
+                    if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
+                        i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
+                        // Found triple quote - copy all three quotes
+                        result[j++] = ch;
+                        result[j++] = ch;
+                        result[j++] = ch;
+                        i += 2;  // Skip the other two quotes
+                        continue;
+                    }
+                    // End of regular string
+                    in_string = 0;
+                }
+                result[j++] = ch;  // Copy the quote character
+            }
+            // Handle comments - skip everything until newline
+            else if (ch == '#' && !in_string) {
                 while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
                     if (tok_mode->last_expr_buffer[i] == '\n') {
-                        result[j++] = tok_mode->last_expr_buffer[i];
+                        result[j++] = tok_mode->last_expr_buffer[i];  // Keep newline
                         break;
                     }
-                    i++;
+                    i++;  // Skip comment character
                 }
-            } else {
-                result[j++] = tok_mode->last_expr_buffer[i];
+            }
+            // Copy any other character unchanged
+            else {
+                result[j++] = ch;
             }
         }
 
@@ -164,11 +225,9 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
             tok_mode->last_expr_size - tok_mode->last_expr_end,
             NULL
         );
-
     }
 
-
-   if (!res) {
+    if (!res) {
         return -1;
     }
     token->metadata = res;

From fdc81d612ea240e53df787e703097346b6d533fa Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Sat, 7 Jun 2025 02:06:46 +0100
Subject: [PATCH 2/3] Address review

---
 Lib/test/test_fstring.py |  6 +++
 Parser/lexer/lexer.c     | 82 ++++++++++++++++------------------------
 2 files changed, 38 insertions(+), 50 deletions(-)

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index fcda09fc58d2c7..89d425d6e27aa7 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1657,6 +1657,12 @@ def __repr__(self):
         self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
         self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
 
+        self.assertEqual(f'{ # some comment goes here
+  """hello"""=}',  ' \n  """hello"""=\'hello\'')
+        self.assertEqual(f'{"""# this is not a comment
+        a""" # this is a comment
+        }', '# this is not a comment\n        a')
+
         # These next lines contains tabs.  Backslash escapes don't
         # work in f-strings.
         # patchcheck doesn't like these tabs.  So the only way to test
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index bfa98b2fe70d43..2a461ac49e8cbe 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -121,99 +121,81 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
     }
     PyObject *res = NULL;
 
-    // Check if there is a # character in the expression
+    // Look for a # character outside of string literals
     int hash_detected = 0;
     int in_string = 0;
+    char quote_char = 0;
     char string_quote = 0;
+    
     for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
         char ch = tok_mode->last_expr_buffer[i];
-        if (ch == '\\' && i + 1 < tok_mode->last_expr_size - tok_mode->last_expr_end) {
-            // Skip the next character if it's an escape sequence
+        
+        // Skip escaped characters
+        if (ch == '\\') {
             i++;
             continue;
         }
+        
+        // Handle quotes
         if (ch == '"' || ch == '\'') {
             if (!in_string) {
                 in_string = 1;
-                string_quote = ch;
-            } else if (ch == string_quote) {
-                // Check for triple quotes
-                if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
-                    i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
-                    // Skip the rest of the triple quote
-                    i += 2;
-                }
+                quote_char = ch;
+            }
+            else if (ch == quote_char) {
                 in_string = 0;
             }
-        } else if (ch == '#' && !in_string) {
+            continue;
+        }
+        
+        // Check for # outside strings
+        if (ch == '#' && !in_string) {
             hash_detected = 1;
             break;
         }
     }
     // If we found a # character in the expression, we need to handle comments
     if (hash_detected) {
-        // Calculate length of input we need to process
-        Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
-
-        // Allocate buffer for processed result, with room for null terminator
-        char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
+        // Allocate buffer for processed result
+        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
         if (!result) {
             return -1;
         }
 
-        // Initialize counters and state
         Py_ssize_t i = 0;  // Input position
         Py_ssize_t j = 0;  // Output position
-        in_string = 0;     // Whether we're currently inside a string
-        string_quote = 0;  // The quote character for current string (' or ")
+        in_string = 0;     // Whether we're in a string
+        string_quote = 0;  // Current string quote char
 
-        // Process each character of input
-        for (i = 0, j = 0; i < input_length; i++) {
+        // Process each character
+        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
             char ch = tok_mode->last_expr_buffer[i];
 
-            // Handle escape sequences - copy both backslash and next char
-            if (ch == '\\' && i + 1 < input_length) {
-                result[j++] = ch;  // Copy backslash
-                result[j++] = tok_mode->last_expr_buffer[++i];  // Copy escaped char
-                continue;
-            }
-
             // Handle string quotes
             if (ch == '"' || ch == '\'') {
                 if (!in_string) {
-                    // Start of new string
                     in_string = 1;
                     string_quote = ch;
                 } else if (ch == string_quote) {
-                    // Potential end of string - check for triple quotes
-                    if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
-                        i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
-                        // Found triple quote - copy all three quotes
-                        result[j++] = ch;
-                        result[j++] = ch;
-                        result[j++] = ch;
-                        i += 2;  // Skip the other two quotes
-                        continue;
-                    }
-                    // End of regular string
                     in_string = 0;
                 }
-                result[j++] = ch;  // Copy the quote character
+                result[j++] = ch;
             }
-            // Handle comments - skip everything until newline
+            // Skip comments
             else if (ch == '#' && !in_string) {
-                while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
-                    if (tok_mode->last_expr_buffer[i] == '\n') {
-                        result[j++] = tok_mode->last_expr_buffer[i];  // Keep newline
-                        break;
-                    }
-                    i++;  // Skip comment character
+                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end && 
+                       tok_mode->last_expr_buffer[i] != '\n') {
+                    i++;
+                }
+                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+                    result[j++] = '\n';
                 }
             }
-            // Copy any other character unchanged
+            // Copy other chars
             else {
                 result[j++] = ch;
             }
+            i++;
         }
 
         result[j] = '\0';  // Null-terminate the result string

From 3c86cce55143ed98db6c8a51a431a363eecbd4aa Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Sat, 7 Jun 2025 14:33:49 +0100
Subject: [PATCH 3/3] Fix linting

---
 Parser/lexer/lexer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index 2a461ac49e8cbe..04c9777cd616ae 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -126,16 +126,16 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
     int in_string = 0;
     char quote_char = 0;
     char string_quote = 0;
-    
+
     for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
         char ch = tok_mode->last_expr_buffer[i];
-        
+
         // Skip escaped characters
         if (ch == '\\') {
             i++;
             continue;
         }
-        
+
         // Handle quotes
         if (ch == '"' || ch == '\'') {
             if (!in_string) {
@@ -147,7 +147,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
             }
             continue;
         }
-        
+
         // Check for # outside strings
         if (ch == '#' && !in_string) {
             hash_detected = 1;
@@ -183,7 +183,7 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
             }
             // Skip comments
             else if (ch == '#' && !in_string) {
-                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end && 
+                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
                        tok_mode->last_expr_buffer[i] != '\n') {
                     i++;
                 }