Skip to content

bpo-40593: Improve syntax errors for invalid characters in source code. #20033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Refactor PyUnicode_IsIdentifier().
  • Loading branch information
serhiy-storchaka committed May 11, 2020
commit 5f3b60ea9d70cc1c547f75593e1eb4d1b51da8a6
57 changes: 19 additions & 38 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -12349,52 +12349,33 @@ _PyUnicode_ScanIdentifier(PyObject *self)
int
PyUnicode_IsIdentifier(PyObject *self)
{
Py_ssize_t i;
int ready = PyUnicode_IS_READY(self);

Py_ssize_t len = ready ? PyUnicode_GET_LENGTH(self) : PyUnicode_GET_SIZE(self);
if (len == 0) {
if (PyUnicode_IS_READY(self)) {
Py_ssize_t i = _PyUnicode_ScanIdentifier(self);
Py_ssize_t len = PyUnicode_GET_LENGTH(self);
/* an empty string is not a valid identifier */
return 0;
}

int kind = 0;
const void *data = NULL;
const wchar_t *wstr = NULL;
Py_UCS4 ch;
if (ready) {
kind = PyUnicode_KIND(self);
data = PyUnicode_DATA(self);
ch = PyUnicode_READ(kind, data, 0);
return len && i == len;
}
else {
wstr = _PyUnicode_WSTR(self);
ch = wstr[0];
}
/* PEP 3131 says that the first character must be in
XID_Start and subsequent characters in XID_Continue,
and for the ASCII range, the 2.x rules apply (i.e
start with letters and underscore, continue with
letters, digits, underscore). However, given the current
definition of XID_Start and XID_Continue, it is sufficient
to check just for these, except that _ must be allowed
as starting an identifier. */
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
}
Py_ssize_t i, len = PyUnicode_GET_SIZE(self);
if (len == 0) {
/* an empty string is not a valid identifier */
return 0;
}

for (i = 1; i < len; i++) {
if (ready) {
ch = PyUnicode_READ(kind, data, i);
const wchar_t *wstr = _PyUnicode_WSTR(self);
Py_UCS4 ch = wstr[0];
if (!_PyUnicode_IsXidStart(ch) && ch != 0x5F /* LOW LINE */) {
return 0;
}
else {

for (i = 1; i < len; i++) {
ch = wstr[i];
if (!_PyUnicode_IsXidContinue(ch)) {
return 0;
}
}
if (!_PyUnicode_IsXidContinue(ch)) {
return 0;
}
return 1;
}
return 1;
}

/*[clinic input]
Expand Down