From 808fdbbfdc498353a67839242a17941e9a640f02 Mon Sep 17 00:00:00 2001 From: Rayane Chatrieux Date: Thu, 7 Apr 2022 02:32:05 -0400 Subject: [PATCH] py: Implement decorator syntax relaxation from CPython 3.9. py/grammar.h: The grammar file hosts the core change associated with this pull request. The decorator rule is changed to match a namedexpr_test rule rather than a dotted_name rule. However, since the compiler currently relies on the dotted_name rule to easily detect built-in decorators (e.g. @micropython.bytecode), a new way of detecting these built-ins needed to be found: 1. One possibility is to have the grammar try to match a dotted_name first and a namedexpr_test second, but since the parser does not bactrack through lexical tokens, it is not obvious to me how to accomplish this. 2. Another possiblity is to write code in the compiler to visit the AST rooted at the namedexpr_test rule of a given decorator and determine whether said decorator is a built-in that way. This could potentially be an expensive operation and/or lead to a significant increase in code size. 3. [SELECTED] A third option is to introduce a grammar rule built_in_decorator that tries to match a "micropython" string, and backtracks into a namedexpr_test rule if there is no match. This relies on the fact that all built-in decorators start with "micropython". One possible concern is that any decorator starting with the "micropython" NAME token must match with a built-in decorator, otherwise a syntax error is issued. This is, however, congruent with current behavior: this is exactly what the compile_built_in_decorator function would do (before this commit). py/parse.c: Add a check to attempt to match the "micropython" string to a NAME token when parsing a built_in_decrator rule. The parser backtracks out of the rule if there is no match. py/compile.c: Since the compile_built_in_decorator function is not used anywhere else, I took the liberty to simplify this function to match the new grammar. tests/basics: Amend the decorator.py test with additional tests for syntax, type and name errors. Adds an expected output file so that this test works out-of-the-box with older versions of CPython than 3.9. tests/cmdline/cmd_parsetree.py.exp: Update the expected rule IDs in the expected output since the grammar is changed. Signed-off-by: Rayane Chatrieux --- py/compile.c | 55 +++++++++--------------- py/grammar.h | 8 +++- py/parse.c | 5 +++ tests/basics/decorator.py | 68 +++++++++++++++++++++++++++++- tests/basics/decorator.py.exp | 17 ++++++++ tests/cmdline/cmd_parsetree.py.exp | 4 +- 6 files changed, 116 insertions(+), 41 deletions(-) create mode 100644 tests/basics/decorator.py.exp diff --git a/py/compile.c b/py/compile.c index e4ead7f1ac2ac..b97aea10147e2 100644 --- a/py/compile.c +++ b/py/compile.c @@ -857,17 +857,8 @@ STATIC qstr compile_classdef_helper(compiler_t *comp, mp_parse_node_struct_t *pn } // returns true if it was a built-in decorator (even if the built-in had an error) -STATIC bool compile_built_in_decorator(compiler_t *comp, size_t name_len, mp_parse_node_t *name_nodes, uint *emit_options) { - if (MP_PARSE_NODE_LEAF_ARG(name_nodes[0]) != MP_QSTR_micropython) { - return false; - } - - if (name_len != 2) { - compile_syntax_error(comp, name_nodes[0], MP_ERROR_TEXT("invalid micropython decorator")); - return true; - } - - qstr attr = MP_PARSE_NODE_LEAF_ARG(name_nodes[1]); +STATIC bool compile_built_in_decorator(compiler_t *comp, mp_parse_node_t name_node, uint *emit_options) { + qstr attr = MP_PARSE_NODE_LEAF_ARG(name_node); if (attr == MP_QSTR_bytecode) { *emit_options = MP_EMIT_OPT_BYTECODE; #if MICROPY_EMIT_NATIVE @@ -888,17 +879,17 @@ STATIC bool compile_built_in_decorator(compiler_t *comp, size_t name_len, mp_par #endif #endif } else { - compile_syntax_error(comp, name_nodes[1], MP_ERROR_TEXT("invalid micropython decorator")); + compile_syntax_error(comp, name_node, MP_ERROR_TEXT("invalid micropython decorator")); } #if MICROPY_EMIT_NATIVE && MICROPY_DYNAMIC_COMPILER if (*emit_options == MP_EMIT_OPT_NATIVE_PYTHON || *emit_options == MP_EMIT_OPT_VIPER) { if (emit_native_table[mp_dynamic_compiler.native_arch] == NULL) { - compile_syntax_error(comp, name_nodes[1], MP_ERROR_TEXT("invalid arch")); + compile_syntax_error(comp, name_node, MP_ERROR_TEXT("invalid arch")); } } else if (*emit_options == MP_EMIT_OPT_ASM) { if (emit_asm_table[mp_dynamic_compiler.native_arch] == NULL) { - compile_syntax_error(comp, name_nodes[1], MP_ERROR_TEXT("invalid arch")); + compile_syntax_error(comp, name_node, MP_ERROR_TEXT("invalid arch")); } } #endif @@ -920,30 +911,22 @@ STATIC void compile_decorated(compiler_t *comp, mp_parse_node_struct_t *pns) { assert(MP_PARSE_NODE_IS_STRUCT_KIND(nodes[i], PN_decorator)); // should be mp_parse_node_struct_t *pns_decorator = (mp_parse_node_struct_t *)nodes[i]; - // nodes[0] contains the decorator function, which is a dotted name - mp_parse_node_t *name_nodes; - size_t name_len = mp_parse_node_extract_list(&pns_decorator->nodes[0], PN_dotted_name, &name_nodes); - - // check for built-in decorators - if (compile_built_in_decorator(comp, name_len, name_nodes, &emit_options)) { - // this was a built-in - num_built_in_decorators += 1; + assert(MP_PARSE_NODE_STRUCT_NUM_NODES(pns_decorator) == 1); + mp_parse_node_t decorator_arg = pns_decorator->nodes[0]; - } else { - // not a built-in, compile normally - - // compile the decorator function - compile_node(comp, name_nodes[0]); - for (size_t j = 1; j < name_len; j++) { - assert(MP_PARSE_NODE_IS_ID(name_nodes[j])); // should be - EMIT_ARG(attr, MP_PARSE_NODE_LEAF_ARG(name_nodes[j]), MP_EMIT_ATTR_LOAD); - } - - // nodes[1] contains arguments to the decorator function, if any - if (!MP_PARSE_NODE_IS_NULL(pns_decorator->nodes[1])) { - // call the decorator function with the arguments in nodes[1] - compile_node(comp, pns_decorator->nodes[1]); + if (MP_PARSE_NODE_IS_STRUCT_KIND(decorator_arg, PN_built_in_decorator)) { + mp_parse_node_struct_t *decorator_arg_s = (mp_parse_node_struct_t *)decorator_arg; + assert(MP_PARSE_NODE_STRUCT_NUM_NODES(decorator_arg_s) == 2); + // The first child node must be the ID "micropython" (enforced by + // the parser). We just need to have a look at the second child. + mp_parse_node_t name_node = decorator_arg_s->nodes[1]; + if (compile_built_in_decorator(comp, name_node, &emit_options)) { + num_built_in_decorators += 1; } + } else { + // decorator_arg is something derived from a namedexpr_test rule. + // Compile it normally. + compile_node(comp, decorator_arg); } } diff --git a/py/grammar.h b/py/grammar.h index 285fbded2fdae..6c923e0c653ce 100644 --- a/py/grammar.h +++ b/py/grammar.h @@ -51,7 +51,9 @@ DEF_RULE_NC(file_input_3, or(2), tok(NEWLINE), rule(stmt)) DEF_RULE_NC(eval_input, and_ident(2), rule(testlist), opt_rule(eval_input_2)) DEF_RULE_NC(eval_input_2, and(1), tok(NEWLINE)) -// decorator: '@' dotted_name [ '(' [arglist] ')' ] NEWLINE +// decorator: '@' decorator_arg NEWLINE +// decorator_arg: built_in_decorator | namedexpr_test +// built_in_decorator: 'micropython' '.' NAME // decorators: decorator+ // decorated: decorators (classdef | funcdef | async_funcdef) // funcdef: 'def' NAME parameters ['->' test] ':' suite @@ -62,7 +64,9 @@ DEF_RULE_NC(eval_input_2, and(1), tok(NEWLINE)) // varargslist: vfpdef ['=' test] (',' vfpdef ['=' test])* [',' ['*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef]] | '*' [vfpdef] (',' vfpdef ['=' test])* [',' '**' vfpdef] | '**' vfpdef // vfpdef: NAME -DEF_RULE_NC(decorator, and(4), tok(OP_AT), rule(dotted_name), opt_rule(trailer_paren), tok(NEWLINE)) +DEF_RULE_NC(decorator, and(3), tok(OP_AT), rule(decorator_arg), tok(NEWLINE)) +DEF_RULE_NC(decorator_arg, or(2), rule(built_in_decorator), rule(namedexpr_test)) +DEF_RULE_NC(built_in_decorator, and(3), tok(NAME), tok(DEL_PERIOD), tok(NAME)) DEF_RULE_NC(decorators, one_or_more, rule(decorator)) DEF_RULE(decorated, c(decorated), and_ident(2), rule(decorators), rule(decorated_body)) #if MICROPY_PY_ASYNC_AWAIT diff --git a/py/parse.c b/py/parse.c index 233cba11b46ca..4bb617208f8d4 100644 --- a/py/parse.c +++ b/py/parse.c @@ -980,6 +980,11 @@ mp_parse_tree_t mp_parse(mp_lexer_t *lex, mp_parse_input_kind_t input_kind) { mp_token_kind_t tok_kind = rule_arg[i] & RULE_ARG_ARG_MASK; if (lex->tok_kind == tok_kind) { // matched token + if (rule_id == RULE_built_in_decorator && i == 0 && qstr_from_strn(lex->vstr.buf, lex->vstr.len) != MP_QSTR_micropython) { + // The first NAME of a built-in decorator must be "micropython" + backtrack = true; + goto next_rule; + } if (tok_kind == MP_TOKEN_NAME) { push_result_token(&parser, rule_id); } diff --git a/tests/basics/decorator.py b/tests/basics/decorator.py index deb341d709d03..2eb53fc7767ab 100644 --- a/tests/basics/decorator.py +++ b/tests/basics/decorator.py @@ -1,5 +1,71 @@ -# test decorators +### Test decorator syntax. +############################################################################### +### Test for syntax errors. +############################################################################### +def test_for_syntax_error(code): + try: + exec(code) + except SyntaxError: + print("SyntaxError") + +decs = [ + "x,", + "x, y", + "x = y", + "pass", + "import sys", + "@dec" +] + +for dec in decs: + code = "@{}\ndef foo(): pass".format(dec) + test_for_syntax_error(code) + +############################################################################### +### Test for type errors (previously syntax errors before Python 3.9). +############################################################################### +def test_for_type_error(code): + try: + exec(code) + except TypeError: + print("TypeError") + +decs = [ + "[1, 2][-1]", + "(1, 2)", + "True", + "...", + "None" +] + +for dec in decs: + code = "@{}\ndef foo(): pass".format(dec) + test_for_type_error(code) + +############################################################################### +### Test for name errors. Tests added to make sure the compiler does not +### confuse anything as a built-in decorator. +############################################################################### +def test_for_name_error(code): + try: + exec(code) + except NameError: + print("NameError") + +decs = [ + "some", + "some.dotted", + "some.dotted.name" +] + +for dec in decs: + code = "@{}\ndef foo(): pass".format(dec) + test_for_name_error(code) + +############################################################################### +### Test valid decorators. +############################################################################### def dec(f): print('dec') return f diff --git a/tests/basics/decorator.py.exp b/tests/basics/decorator.py.exp new file mode 100644 index 0000000000000..61511ecb82c17 --- /dev/null +++ b/tests/basics/decorator.py.exp @@ -0,0 +1,17 @@ +SyntaxError +SyntaxError +SyntaxError +SyntaxError +SyntaxError +SyntaxError +TypeError +TypeError +TypeError +TypeError +TypeError +NameError +NameError +NameError +dec +dec_arg +dec diff --git a/tests/cmdline/cmd_parsetree.py.exp b/tests/cmdline/cmd_parsetree.py.exp index 6ee96b7caf293..fa0e7a00b16fe 100644 --- a/tests/cmdline/cmd_parsetree.py.exp +++ b/tests/cmdline/cmd_parsetree.py.exp @@ -32,11 +32,11 @@ id(h) [ 13] \(rule\|atom_expr_normal\)(44) (n=2) [ 13] literal const(\.\+) -[ 13] \(rule\|atom_expr_trailers\)(142) (n=2) +[ 13] \(rule\|atom_expr_trailers\)(144) (n=2) [ 13] \(rule\|trailer_period\)(50) (n=1) id(format) [ 13] \(rule\|trailer_paren\)(48) (n=1) -[ 13] \(rule\|arglist\)(164) (n=1) +[ 13] \(rule\|arglist\)(166) (n=1) id(b) ---------------- File cmdline/cmd_parsetree.py, code block '' (descriptor: \.\+, bytecode @\.\+ 62 bytes)