Address review

python · pablogsal · Jul 16, 2025 · Jun 6, 2025 · Jun 7, 2025 · Jun 7, 2025
commit fdc81d612ea240e53df787e703097346b6d533fa
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
@@ -1657,6 +1657,12 @@ def __repr__(self):
         self.assertEqual(f'{" # nooo "=}', '" # nooo "=\' # nooo \'')
         self.assertEqual(f'{" \" # nooo \" "=}', '" \\" # nooo \\" "=\' " # nooo " \'')
 
+        self.assertEqual(f'{ # some comment goes here
+  """hello"""=}',  ' \n  """hello"""=\'hello\'')
+        self.assertEqual(f'{"""# this is not a comment
+        a""" # this is a comment
+        }', '# this is not a comment\n        a')
+
         # These next lines contains tabs.  Backslash escapes don't
         # work in f-strings.
         # patchcheck doesn't like these tabs.  So the only way to test

@@ -121,99 +121,81 @@ set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
     }
     PyObject *res = NULL;
 
-    // Check if there is a # character in the expression
+    // Look for a # character outside of string literals
     int hash_detected = 0;
     int in_string = 0;
+    char quote_char = 0;
     char string_quote = 0;
+
     for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
         char ch = tok_mode->last_expr_buffer[i];
-        if (ch == '\\' && i + 1 < tok_mode->last_expr_size - tok_mode->last_expr_end) {
-            // Skip the next character if it's an escape sequence
+
+        // Skip escaped characters
+        if (ch == '\\') {
             i++;
             continue;
         }
+
+        // Handle quotes
         if (ch == '"' || ch == '\'') {
             if (!in_string) {
                 in_string = 1;
-                string_quote = ch;
-            } else if (ch == string_quote) {
-                // Check for triple quotes
-                if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
-                    i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
-                    // Skip the rest of the triple quote
-                    i += 2;
-                }
+                quote_char = ch;
+            }
+            else if (ch == quote_char) {
                 in_string = 0;
             }
-        } else if (ch == '#' && !in_string) {
+            continue;
+        }
+
+        // Check for # outside strings
+        if (ch == '#' && !in_string) {
             hash_detected = 1;
             break;
         }
     }
     // If we found a # character in the expression, we need to handle comments
     if (hash_detected) {
-        // Calculate length of input we need to process
-        Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
-
-        // Allocate buffer for processed result, with room for null terminator
-        char *result = (char *)PyMem_Malloc((input_length + 1) * sizeof(char));
+        // Allocate buffer for processed result
+        char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
         if (!result) {
             return -1;
         }
 
-        // Initialize counters and state
         Py_ssize_t i = 0;  // Input position
         Py_ssize_t j = 0;  // Output position
-        in_string = 0;     // Whether we're currently inside a string
-        string_quote = 0;  // The quote character for current string (' or ")
+        in_string = 0;     // Whether we're in a string
+        string_quote = 0;  // Current string quote char
 
-        // Process each character of input
-        for (i = 0, j = 0; i < input_length; i++) {
+        // Process each character
+        while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
             char ch = tok_mode->last_expr_buffer[i];
 
-            // Handle escape sequences - copy both backslash and next char
-            if (ch == '\\' && i + 1 < input_length) {
-                result[j++] = ch;  // Copy backslash
-                result[j++] = tok_mode->last_expr_buffer[++i];  // Copy escaped char
-                continue;
-            }
-
             // Handle string quotes
             if (ch == '"' || ch == '\'') {
                 if (!in_string) {
-                    // Start of new string
                     in_string = 1;
                     string_quote = ch;
                 } else if (ch == string_quote) {
-                    // Potential end of string - check for triple quotes
-                    if (i > 0 && tok_mode->last_expr_buffer[i-1] == ch &&
-                        i > 1 && tok_mode->last_expr_buffer[i-2] == ch) {
-                        // Found triple quote - copy all three quotes
-                        result[j++] = ch;
-                        result[j++] = ch;
-                        result[j++] = ch;
-                        i += 2;  // Skip the other two quotes
-                        continue;
-                    }
-                    // End of regular string
                     in_string = 0;
                 }
-                result[j++] = ch;  // Copy the quote character
+                result[j++] = ch;
             }
-            // Handle comments - skip everything until newline
+            // Skip comments
             else if (ch == '#' && !in_string) {
-                while (i < input_length && tok_mode->last_expr_buffer[i] != '\0') {
-                    if (tok_mode->last_expr_buffer[i] == '\n') {
-                        result[j++] = tok_mode->last_expr_buffer[i];  // Keep newline
-                        break;
-                    }
-                    i++;  // Skip comment character
+                while (i < tok_mode->last_expr_size - tok_mode->last_expr_end && 
+                       tok_mode->last_expr_buffer[i] != '\n') {
+                    i++;
+                }
+                if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
+                    result[j++] = '\n';
                 }
             }
-            // Copy any other character unchanged
+            // Copy other chars
             else {
                 result[j++] = ch;
             }
+            i++;
         }
 
         result[j] = '\0';  // Null-terminate the result string