Skip to content

DO NOT MERGE -- Tag strings #103766

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
3 changes: 2 additions & 1 deletion Grammar/python.gram
Original file line number Diff line number Diff line change
Expand Up @@ -796,6 +796,7 @@ slice[expr_ty]:
| a=named_expression { a }

atom[expr_ty]:
| a=NAME b=STRING { _PyPegen_tag_string(p, a, (Token *)b) }
| NAME
| 'True' { _PyAST_Constant(Py_True, NULL, EXTRA) }
| 'False' { _PyAST_Constant(Py_False, NULL, EXTRA) }
Expand Down Expand Up @@ -870,7 +871,7 @@ lambda_param[arg_ty]: a=NAME { _PyAST_arg(a->v.Name.id, NULL, NULL, EXTRA) }
# LITERALS
# ========

strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a) }
strings[expr_ty] (memo): a=STRING+ { _PyPegen_concatenate_strings(p, a, 0) }

list[expr_ty]:
| '[' a=[star_named_expressions] ']' { _PyAST_List(a, Load, EXTRA) }
Expand Down
14 changes: 11 additions & 3 deletions Include/internal/pycore_ast.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Include/internal/pycore_ast_state.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Parser/Python.asdl
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ module Python
| Call(expr func, expr* args, keyword* keywords)
| FormattedValue(expr value, int conversion, expr? format_spec)
| JoinedStr(expr* values)
| TagString(expr tag, expr str)
| Constant(constant value, string? kind)

-- the following expression can appear in assignment context
Expand Down
102 changes: 100 additions & 2 deletions Parser/action_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -869,8 +869,106 @@ _PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
return new_seq;
}

static expr_ty
lambdafy(Parser *p, expr_ty arg)
{
arguments_ty args = _PyPegen_empty_arguments(p);
if (args == NULL)
return NULL;
return _PyAST_Lambda(args, arg,
arg->lineno, arg->col_offset, arg->end_lineno, arg->end_col_offset,
p->arena);
}

expr_ty
_PyPegen_tag_string(Parser *p, expr_ty tag, Token *tok)
{
if (tag->end_lineno != tok->lineno ||
tag->end_col_offset != tok->col_offset) {
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
tag->end_lineno, tag->end_col_offset,
tok->lineno, tok->col_offset,
"cannot have space between tag and string");
}
asdl_generic_seq *tokens = _Py_asdl_generic_seq_new(1, p->arena);
if (tokens == NULL)
return NULL;
asdl_seq_SET(tokens, 0, tok);
expr_ty str = _PyPegen_concatenate_strings(p, (asdl_seq *)tokens, 1);
if (str == NULL)
return NULL;
if (str->kind == JoinedStr_kind) {
// Transform FormattedValue items into thunks (for now, tuples)
asdl_expr_seq *values = str->v.JoinedStr.values;
int nvalues = asdl_seq_LEN(values);
expr_ty none = NULL;
for (int i = 0; i < nvalues; i++) {
expr_ty value = asdl_seq_GET(values, i);
if (value->kind == FormattedValue_kind) {
if (none == NULL) {
none = _PyAST_Constant(Py_None, NULL,
str->lineno, str->col_offset,
str->end_lineno, str->end_col_offset,
p->arena);
if (none == NULL)
return NULL;
}
expr_ty expr = value->v.FormattedValue.value;
expr_ty lambda = lambdafy(p, expr);
if (lambda == NULL)
return NULL;
constant rawstr = _PyAST_ExprAsUnicode(expr);
if (rawstr == NULL)
return NULL;
expr_ty raw = _PyAST_Constant(rawstr, NULL,
expr->lineno, expr->col_offset,
expr->end_lineno, expr->end_col_offset,
p->arena);
if (raw == NULL)
return NULL;
expr_ty conv = none;
int conversion = value->v.FormattedValue.conversion;
if (conversion >= 0) {
char buf[1];
buf[0] = conversion;
constant uconv = _PyUnicode_FromASCII(buf, 1);
if (uconv == NULL)
return NULL;
conv = _PyAST_Constant(uconv, NULL,
expr->lineno, expr->col_offset,
expr->end_lineno, expr->end_col_offset,
p->arena);
if (conv == NULL)
return NULL;
}
expr_ty spec = value->v.FormattedValue.format_spec;
if (spec == NULL) {
spec = none;
}
asdl_expr_seq *elts = _Py_asdl_expr_seq_new(4, p->arena);
if (elts == NULL)
return NULL;
asdl_seq_SET(elts, 0, lambda);
asdl_seq_SET(elts, 1, raw);
asdl_seq_SET(elts, 2, conv);
asdl_seq_SET(elts, 3, spec);
expr_ty tuple = _PyAST_Tuple(elts, Load,
value->lineno, value->col_offset,
value->end_lineno, value->end_col_offset,
p->arena);
if (tuple == NULL)
return NULL;
asdl_seq_SET(values, i, tuple);
}
}
}
return _PyAST_TagString(tag, str,
tag->lineno, tag->col_offset, str->end_lineno, str->end_col_offset,
p->arena);
}

expr_ty
_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings, int tagged)
{
Py_ssize_t len = asdl_seq_LEN(strings);
assert(len > 0);
Expand All @@ -893,7 +991,7 @@ _PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
const char *fstr;
Py_ssize_t fstrlen = -1;

if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t) != 0) {
if (_PyPegen_parsestr(p, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen, t, tagged) != 0) {
goto error;
}

Expand Down
30 changes: 29 additions & 1 deletion Parser/parser.c

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Parser/pegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,8 @@ asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
int lineno, int col_offset, int end_lineno,
int end_col_offset, PyArena *arena);
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *, int);
expr_ty _PyPegen_tag_string(Parser *p, expr_ty, Token *);
expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
Expand Down
10 changes: 7 additions & 3 deletions Parser/string_parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t)
string object. Return 0 if no errors occurred. */
int
_PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,
const char **fstr, Py_ssize_t *fstrlen, Token *t)
const char **fstr, Py_ssize_t *fstrlen, Token *t, int tagged)
{
const char *s = PyBytes_AsString(t->bytes);
if (s == NULL) {
Expand All @@ -175,12 +175,16 @@ _PyPegen_parsestr(Parser *p, int *bytesmode, int *rawmode, PyObject **result,

size_t len;
int quote = Py_CHARMASK(*s);
int fmode = 0;
int fmode = tagged;
*bytesmode = 0;
*rawmode = 0;
*rawmode = tagged;
*result = NULL;
*fstr = NULL;
if (Py_ISALPHA(quote)) {
if (tagged) {
RAISE_SYNTAX_ERROR("Cannot combine tag and letter prefix");
return -1;
}
while (!*bytesmode || !*rawmode) {
if (quote == 'b' || quote == 'B') {
quote =(unsigned char)*++s;
Expand Down
2 changes: 1 addition & 1 deletion Parser/string_parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ typedef struct {

void _PyPegen_FstringParser_Init(FstringParser *);
int _PyPegen_parsestr(Parser *, int *, int *, PyObject **,
const char **, Py_ssize_t *, Token *);
const char **, Py_ssize_t *, Token *, int);
int _PyPegen_FstringParser_ConcatFstring(Parser *, FstringParser *, const char **,
const char *, int, int, Token *, Token *,
Token *);
Expand Down
Loading