Skip to content

Commit 904af3d

Browse files
ambvpablogsal
andauthored
[3.10] bpo-45848: Allow the parser to get error lines from encoded files (GH-29646) (GH-29661)
(cherry picked from commit fdcc46d) Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
1 parent bbe3c57 commit 904af3d

File tree

6 files changed

+49
-11
lines changed

6 files changed

+49
-11
lines changed

.gitignore

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,9 @@ Tools/ssl/win32
134134
# Ignore ./python binary on Unix but still look into ./Python/ directory.
135135
/python
136136
!/Python/
137+
138+
# Artifacts generated by 3.11 lying around when switching branches:
139+
/_bootstrap_python
140+
/Programs/_freeze_module
141+
/Python/deepfreeze/
142+
/Python/frozen_modules/

Include/cpython/pyerrors.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,12 @@ Py_DEPRECATED(3.3) PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create(
185185
Py_ssize_t end,
186186
const char *reason /* UTF-8 encoded string */
187187
);
188+
189+
PyAPI_FUNC(PyObject *) _PyErr_ProgramDecodedTextObject(
190+
PyObject *filename,
191+
int lineno,
192+
const char* encoding);
193+
188194
PyAPI_FUNC(PyObject *) _PyUnicodeTranslateError_Create(
189195
PyObject *object,
190196
Py_ssize_t start,

Lib/test/test_exceptions.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2352,6 +2352,19 @@ def test_encodings(self):
23522352
finally:
23532353
unlink(TESTFN)
23542354

2355+
# Check backwards tokenizer errors
2356+
source = '# -*- coding: ascii -*-\n\n(\n'
2357+
try:
2358+
with open(TESTFN, 'w', encoding='ascii') as testfile:
2359+
testfile.write(source)
2360+
rc, out, err = script_helper.assert_python_failure('-Wd', '-X', 'utf8', TESTFN)
2361+
err = err.decode('utf-8').splitlines()
2362+
2363+
self.assertEqual(err[-3], ' (')
2364+
self.assertEqual(err[-2], ' ^')
2365+
finally:
2366+
unlink(TESTFN)
2367+
23552368
def test_attributes_new_constructor(self):
23562369
args = ("bad.py", 1, 2, "abcdefg", 1, 100)
23572370
the_exception = SyntaxError("bad bad", args)
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Allow the parser to obtain error lines directly from encoded files. Patch by
2+
Pablo Galindo

Parser/pegen.c

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -480,14 +480,12 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
480480
goto error;
481481
}
482482

483-
// PyErr_ProgramTextObject assumes that the text is utf-8 so we cannot call it with a file
484-
// with an arbitrary encoding or otherwise we could get some badly decoded text.
485-
int uses_utf8_codec = (!p->tok->encoding || strcmp(p->tok->encoding, "utf-8") == 0);
486483
if (p->tok->fp_interactive) {
487484
error_line = get_error_line(p, lineno);
488485
}
489-
else if (uses_utf8_codec && p->start_rule == Py_file_input) {
490-
error_line = PyErr_ProgramTextObject(p->tok->filename, (int) lineno);
486+
else if (p->start_rule == Py_file_input) {
487+
error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
488+
(int) lineno, p->tok->encoding);
491489
}
492490

493491
if (!error_line) {
@@ -498,15 +496,18 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
498496
we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
499497
`PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
500498
does not physically exist */
501-
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
499+
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
502500

503501
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
504502
Py_ssize_t size = p->tok->inp - p->tok->buf;
505503
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
506504
}
507-
else {
505+
else if (p->tok->fp == NULL || p->tok->fp == stdin) {
508506
error_line = get_error_line(p, lineno);
509507
}
508+
else {
509+
error_line = PyUnicode_FromStringAndSize("", 0);
510+
}
510511
if (!error_line) {
511512
goto error;
512513
}

Python/errors.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1724,7 +1724,7 @@ PyErr_SyntaxLocationEx(const char *filename, int lineno, int col_offset)
17241724
functionality in tb_displayline() in traceback.c. */
17251725

17261726
static PyObject *
1727-
err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
1727+
err_programtext(PyThreadState *tstate, FILE *fp, int lineno, const char* encoding)
17281728
{
17291729
int i;
17301730
char linebuf[1000];
@@ -1752,7 +1752,11 @@ err_programtext(PyThreadState *tstate, FILE *fp, int lineno)
17521752
fclose(fp);
17531753
if (i == lineno) {
17541754
PyObject *res;
1755-
res = PyUnicode_FromString(linebuf);
1755+
if (encoding != NULL) {
1756+
res = PyUnicode_Decode(linebuf, strlen(linebuf), encoding, "replace");
1757+
} else {
1758+
res = PyUnicode_FromString(linebuf);
1759+
}
17561760
if (res == NULL)
17571761
_PyErr_Clear(tstate);
17581762
return res;
@@ -1778,7 +1782,7 @@ PyErr_ProgramText(const char *filename, int lineno)
17781782
}
17791783

17801784
PyObject *
1781-
PyErr_ProgramTextObject(PyObject *filename, int lineno)
1785+
_PyErr_ProgramDecodedTextObject(PyObject *filename, int lineno, const char* encoding)
17821786
{
17831787
if (filename == NULL || lineno <= 0) {
17841788
return NULL;
@@ -1790,7 +1794,13 @@ PyErr_ProgramTextObject(PyObject *filename, int lineno)
17901794
_PyErr_Clear(tstate);
17911795
return NULL;
17921796
}
1793-
return err_programtext(tstate, fp, lineno);
1797+
return err_programtext(tstate, fp, lineno, encoding);
1798+
}
1799+
1800+
PyObject *
1801+
PyErr_ProgramTextObject(PyObject *filename, int lineno)
1802+
{
1803+
return _PyErr_ProgramDecodedTextObject(filename, lineno, NULL);
17941804
}
17951805

17961806
#ifdef __cplusplus

0 commit comments

Comments
 (0)