Skip to content

Commit 90e366e

Browse files
committed
parse: Don't allow creation of invalid UTF8 strings or identifiers.
.. even when compiling non UTF-8 files or byte strings. Closes: #17855 Signed-off-by: Jeff Epler <jepler@gmail.com>
1 parent a614243 commit 90e366e

File tree

3 files changed

+10
-1
lines changed

3 files changed

+10
-1
lines changed

py/objstr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2461,7 +2461,7 @@ mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str, mp_obj_iter_buf_t *iter_buf) {
24612461
}
24622462

24632463
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
2464-
static bool mp_utf8_check(const byte *p, size_t len) {
2464+
bool mp_utf8_check(const byte *p, size_t len) {
24652465
uint8_t need = 0;
24662466
const byte *end = p + len;
24672467
for (; p < end; p++) {

py/objstr.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,14 @@ extern const mp_obj_dict_t mp_obj_array_locals_dict;
122122
#if MICROPY_PY_BUILTINS_STR_UNICODE && MICROPY_PY_BUILTINS_STR_UNICODE_CHECK
123123
// Throws an exception if string content is not UTF-8
124124
void mp_utf8_require(const byte *p, size_t len);
125+
bool mp_utf8_check(const byte *p, size_t len);
125126
#else
126127
// If unicode strings are not enabled, or the check is explicitly disabled, it's a no-op
127128
static inline void mp_utf8_require(const byte *p, size_t len) {
128129
}
130+
static inline bool mp_utf8_check(const byte *p, size_t len) {
131+
return true;
132+
}
129133
#endif
130134

131135
#endif // MICROPY_INCLUDED_PY_OBJSTR_H

py/parse.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,11 @@ static mp_parse_node_t make_node_const_object_optimised(parser_t *parser, size_t
598598
static void push_result_token(parser_t *parser, uint8_t rule_id) {
599599
mp_parse_node_t pn;
600600
mp_lexer_t *lex = parser->lexer;
601+
if (lex->tok_kind == MP_TOKEN_NAME || lex->tok_kind == MP_TOKEN_STRING) {
602+
if (!mp_utf8_check((byte *)lex->vstr.buf, lex->vstr.len)) {
603+
mp_raise_msg(&mp_type_SyntaxError, NULL);
604+
}
605+
}
601606
if (lex->tok_kind == MP_TOKEN_NAME) {
602607
qstr id = qstr_from_strn(lex->vstr.buf, lex->vstr.len);
603608
#if MICROPY_COMP_CONST

0 commit comments

Comments
 (0)