Skip to content

Commit 23a3e89

Browse files
[3.13] gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328) (#137345)
gh-137314: Fix incorrect treatment of format specs in raw fstrings (GH-137328) (cherry picked from commit 0153d82) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent 7adea43 commit 23a3e89

File tree

5 files changed

+59
-17
lines changed

5 files changed

+59
-17
lines changed

Lib/test/test_fstring.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,6 +1821,34 @@ def test_newlines_in_format_specifiers(self):
18211821
for case in valid_cases:
18221822
compile(case, "<string>", "exec")
18231823

1824+
def test_raw_fstring_format_spec(self):
1825+
# Test raw f-string format spec behavior (Issue #137314).
1826+
#
1827+
# Raw f-strings should preserve literal backslashes in format specifications,
1828+
# not interpret them as escape sequences.
1829+
class UnchangedFormat:
1830+
"""Test helper that returns the format spec unchanged."""
1831+
def __format__(self, format):
1832+
return format
1833+
1834+
# Test basic escape sequences
1835+
self.assertEqual(f"{UnchangedFormat():\xFF}", 'ÿ')
1836+
self.assertEqual(rf"{UnchangedFormat():\xFF}", '\\xFF')
1837+
1838+
# Test nested expressions with raw/non-raw combinations
1839+
self.assertEqual(rf"{UnchangedFormat():{'\xFF'}}", 'ÿ')
1840+
self.assertEqual(f"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
1841+
self.assertEqual(rf"{UnchangedFormat():{r'\xFF'}}", '\\xFF')
1842+
1843+
# Test continuation character in format specs
1844+
self.assertEqual(f"""{UnchangedFormat():{'a'\
1845+
'b'}}""", 'ab')
1846+
self.assertEqual(rf"""{UnchangedFormat():{'a'\
1847+
'b'}}""", 'ab')
1848+
1849+
# Test multiple format specs in same raw f-string
1850+
self.assertEqual(rf"{UnchangedFormat():\xFF} {UnchangedFormat():\n}", '\\xFF \\n')
1851+
18241852

18251853
if __name__ == '__main__':
18261854
unittest.main()
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Fixed a regression where raw f-strings incorrectly interpreted
2+
escape sequences in format specifications. Raw f-strings now properly preserve
3+
literal backslashes in format specs, matching the behavior from Python 3.11.
4+
For example, ``rf"{obj:\xFF}"`` now correctly produces ``'\\xFF'`` instead of
5+
``'ÿ'``. Patch by Pablo Galindo.

Parser/action_helpers.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include <Python.h>
22

33
#include "pegen.h"
4+
#include "lexer/state.h"
45
#include "string_parser.h"
56
#include "pycore_runtime.h" // _PyRuntime
67
#include "pycore_pystate.h" // _PyInterpreterState_GET()
@@ -1369,7 +1370,15 @@ expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok) {
13691370
if (PyBytes_AsStringAndSize(tok->bytes, &bstr, &bsize) == -1) {
13701371
return NULL;
13711372
}
1372-
PyObject* str = _PyPegen_decode_string(p, 0, bstr, bsize, tok);
1373+
1374+
// Check if we're inside a raw f-string for format spec decoding
1375+
int is_raw = 0;
1376+
if (INSIDE_FSTRING(p->tok)) {
1377+
tokenizer_mode *mode = TOK_GET_MODE(p->tok);
1378+
is_raw = mode->f_string_raw;
1379+
}
1380+
1381+
PyObject* str = _PyPegen_decode_string(p, is_raw, bstr, bsize, tok);
13731382
if (str == NULL) {
13741383
return NULL;
13751384
}

Parser/lexer/lexer.c

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -22,22 +22,6 @@
2222
|| c == '_'\
2323
|| (c >= 128))
2424

25-
#ifdef Py_DEBUG
26-
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
27-
assert(tok->tok_mode_stack_index >= 0);
28-
assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
29-
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
30-
}
31-
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
32-
assert(tok->tok_mode_stack_index >= 0);
33-
assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
34-
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
35-
}
36-
#else
37-
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
38-
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
39-
#endif
40-
4125
#define MAKE_TOKEN(token_type) _PyLexer_token_setup(tok, token, token_type, p_start, p_end)
4226
#define MAKE_TYPE_COMMENT_TOKEN(token_type, col_offset, end_col_offset) (\
4327
_PyLexer_type_comment_token_setup(tok, token, token_type, col_offset, end_col_offset, p_start, p_end))

Parser/lexer/state.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef _PY_LEXER_H_
22
#define _PY_LEXER_H_
33

4+
#include "Python.h"
45
#include "object.h"
56

67
#define MAXINDENT 100 /* Max indentation level */
@@ -138,5 +139,20 @@ void _PyTokenizer_Free(struct tok_state *);
138139
void _PyToken_Free(struct token *);
139140
void _PyToken_Init(struct token *);
140141

142+
#ifdef Py_DEBUG
143+
static inline tokenizer_mode* TOK_GET_MODE(struct tok_state* tok) {
144+
assert(tok->tok_mode_stack_index >= 0);
145+
assert(tok->tok_mode_stack_index < MAXFSTRINGLEVEL);
146+
return &(tok->tok_mode_stack[tok->tok_mode_stack_index]);
147+
}
148+
static inline tokenizer_mode* TOK_NEXT_MODE(struct tok_state* tok) {
149+
assert(tok->tok_mode_stack_index >= 0);
150+
assert(tok->tok_mode_stack_index + 1 < MAXFSTRINGLEVEL);
151+
return &(tok->tok_mode_stack[++tok->tok_mode_stack_index]);
152+
}
153+
#else
154+
#define TOK_GET_MODE(tok) (&(tok->tok_mode_stack[tok->tok_mode_stack_index]))
155+
#define TOK_NEXT_MODE(tok) (&(tok->tok_mode_stack[++tok->tok_mode_stack_index]))
156+
#endif
141157

142158
#endif

0 commit comments

Comments
 (0)