From f204fbd88a2ea153461f302fb7340fa84c7a2ea5 Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Mon, 20 Nov 2023 14:13:31 +0000
Subject: [PATCH 1/2] gh-112243: Don't include comments in f-string debug
 expressions

---
 Lib/test/test_fstring.py                      |  3 +
 ...-11-20-14-13-02.gh-issue-112243.FKdQnr.rst |  1 +
 Parser/lexer/lexer.c                          | 55 +++++++++++++++++--
 3 files changed, 53 insertions(+), 6 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-11-20-14-13-02.gh-issue-112243.FKdQnr.rst

diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index dd8c2dd628ee13..da0160d2382cc6 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -1627,6 +1627,9 @@ def __repr__(self):
         self.assertEqual(f'X{x  =  }Y', 'Xx  =  '+repr(x)+'Y')
         self.assertEqual(f"sadsd {1 + 1 =  :{1 + 1:1d}f}", "sadsd 1 + 1 =  2.000000")
 
+        self.assertEqual(f"{1+2 = # my comment
+  }", '1+2 = \n  3')
+
         # These next lines contains tabs.  Backslash escapes don't
         # work in f-strings.
         # patchcheck doesn't like these tabs.  So the only way to test
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-11-20-14-13-02.gh-issue-112243.FKdQnr.rst b/Misc/NEWS.d/next/Core and Builtins/2023-11-20-14-13-02.gh-issue-112243.FKdQnr.rst
new file mode 100644
index 00000000000000..d69f29f5c63490
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-11-20-14-13-02.gh-issue-112243.FKdQnr.rst	
@@ -0,0 +1 @@
+Don't include comments in f-string debug expressions. Patch by Pablo Galindo
diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index c7134ab868bfbd..be36a748a91572 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -112,13 +112,56 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
     if (!tok_mode->f_string_debug || token->metadata) {
         return 0;
     }
+    PyObject *res = NULL;
 
-    PyObject *res = PyUnicode_DecodeUTF8(
-        tok_mode->last_expr_buffer,
-        tok_mode->last_expr_size - tok_mode->last_expr_end,
-        NULL
-    );
-    if (!res) {
+    // Check if there is a # character in the expression
+    int hash_detected;
+    for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
+        if (tok_mode->last_expr_buffer[i] == '#') {
+            hash_detected = 1;
+            break;
+        }
+    }
+
+    if (hash_detected) {
+        Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
+        char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); 
+        if (!result) {
+            return -1;
+        }
+
+        Py_ssize_t i = 0;
+        Py_ssize_t j = 0;
+
+        for (i = 0, j = 0; i < input_length; i++) {
+            if (tok_mode->last_expr_buffer[i] == '#') {
+                // Skip characters until newline or end of string
+                while (tok_mode->last_expr_buffer[i] != '\0') {
+                    if (tok_mode->last_expr_buffer[i] == '\n') {
+                        result[j++] = tok_mode->last_expr_buffer[i];
+                        break;
+                    }
+                    i++;
+                }
+            } else {
+                result[j++] = tok_mode->last_expr_buffer[i];
+            }
+        }
+
+        result[j] = '\0';  // Null-terminate the result string
+        res = PyUnicode_DecodeUTF8(result, j, NULL);
+        PyObject_Free(result);
+    } else {
+        res = PyUnicode_DecodeUTF8(
+            tok_mode->last_expr_buffer,
+            tok_mode->last_expr_size - tok_mode->last_expr_end,
+            NULL
+        );
+    
+    }
+ 
+
+   if (!res) {
         return -1;
     }
     token->metadata = res;

From 2637415d006ebfceea8c46834eb3bf8d0ba2ce8e Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Mon, 20 Nov 2023 14:39:11 +0000
Subject: [PATCH 2/2] fixup! gh-112243: Don't include comments in f-string
 debug expressions

---
 Parser/lexer/lexer.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c
index be36a748a91572..a256cbb5c4cbc0 100644
--- a/Parser/lexer/lexer.c
+++ b/Parser/lexer/lexer.c
@@ -115,7 +115,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
     PyObject *res = NULL;
 
     // Check if there is a # character in the expression
-    int hash_detected;
+    int hash_detected = 0;
     for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
         if (tok_mode->last_expr_buffer[i] == '#') {
             hash_detected = 1;
@@ -125,7 +125,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
 
     if (hash_detected) {
         Py_ssize_t input_length = tok_mode->last_expr_size - tok_mode->last_expr_end;
-        char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char)); 
+        char *result = (char *)PyObject_Malloc((input_length + 1) * sizeof(char));
         if (!result) {
             return -1;
         }
@@ -136,7 +136,7 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
         for (i = 0, j = 0; i < input_length; i++) {
             if (tok_mode->last_expr_buffer[i] == '#') {
                 // Skip characters until newline or end of string
-                while (tok_mode->last_expr_buffer[i] != '\0') {
+                while (tok_mode->last_expr_buffer[i] != '\0' && i < input_length) {
                     if (tok_mode->last_expr_buffer[i] == '\n') {
                         result[j++] = tok_mode->last_expr_buffer[i];
                         break;
@@ -157,9 +157,9 @@ set_fstring_expr(struct tok_state* tok, struct token *token, char c) {
             tok_mode->last_expr_size - tok_mode->last_expr_end,
             NULL
         );
-    
+
     }
- 
+
 
    if (!res) {
         return -1;