Skip to content

Commit 4c50d23

Browse files
authored
Raise SyntaxError on invalid encoding symbol (ruby#10967)
[Bug #20280] Backport of ruby#10014.
1 parent 40251ed commit 4c50d23

File tree

7 files changed

+49
-21
lines changed

7 files changed

+49
-21
lines changed

parse.y

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12846,17 +12846,31 @@ new_defined(struct parser_params *p, NODE *expr, const YYLTYPE *loc)
1284612846
return NEW_DEFINED(n, loc);
1284712847
}
1284812848

12849+
static VALUE
12850+
str_to_sym_check(struct parser_params *p, VALUE lit, const YYLTYPE *loc)
12851+
{
12852+
if (rb_enc_str_coderange(lit) == ENC_CODERANGE_BROKEN) {
12853+
yyerror1(loc, "invalid symbol");
12854+
lit = STR_NEW0();
12855+
}
12856+
12857+
return lit;
12858+
}
12859+
1284912860
static NODE*
1285012861
symbol_append(struct parser_params *p, NODE *symbols, NODE *symbol)
1285112862
{
12863+
VALUE lit;
12864+
1285212865
enum node_type type = nd_type(symbol);
1285312866
switch (type) {
1285412867
case NODE_DSTR:
1285512868
nd_set_type(symbol, NODE_DSYM);
1285612869
break;
1285712870
case NODE_STR:
1285812871
nd_set_type(symbol, NODE_LIT);
12859-
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(symbol)->nd_lit = rb_str_intern(RNODE_LIT(symbol)->nd_lit));
12872+
lit = str_to_sym_check(p, RNODE_LIT(symbol)->nd_lit, &RNODE(symbol)->nd_loc);
12873+
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(symbol)->nd_lit = rb_str_intern(lit));
1286012874
break;
1286112875
default:
1286212876
compile_error(p, "unexpected node as symbol: %s", parser_node_name(type));
@@ -14553,7 +14567,7 @@ dsym_node(struct parser_params *p, NODE *node, const YYLTYPE *loc)
1455314567
nd_set_loc(node, loc);
1455414568
break;
1455514569
case NODE_STR:
14556-
lit = RNODE_STR(node)->nd_lit;
14570+
lit = str_to_sym_check(p, RNODE_STR(node)->nd_lit, &RNODE(node)->nd_loc);
1455714571
RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_STR(node)->nd_lit = ID2SYM(rb_intern_str(lit)));
1455814572
nd_set_type(node, NODE_LIT);
1455914573
nd_set_loc(node, loc);

ruby_parser.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -679,12 +679,14 @@ rb_parser_config_initialize(rb_parser_config_t *config)
679679
config->enc_isspace = enc_isspace;
680680
config->enc_coderange_7bit = ENC_CODERANGE_7BIT;
681681
config->enc_coderange_unknown = ENC_CODERANGE_UNKNOWN;
682+
config->enc_coderange_broken = ENC_CODERANGE_BROKEN;
682683
config->enc_compatible = enc_compatible;
683684
config->enc_from_encoding = enc_from_encoding;
684685
config->encoding_get = encoding_get;
685686
config->encoding_set = encoding_set;
686687
config->encoding_is_ascii8bit = encoding_is_ascii8bit;
687688
config->usascii_encoding = usascii_encoding;
689+
config->enc_str_coderange = rb_enc_str_coderange;
688690

689691
config->ractor_make_shareable = rb_ractor_make_shareable;
690692

rubyparser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,12 +1295,14 @@ typedef struct rb_parser_config_struct {
12951295
int (*enc_isspace)(OnigCodePoint c, rb_encoding *enc);
12961296
int enc_coderange_7bit;
12971297
int enc_coderange_unknown;
1298+
int enc_coderange_broken;
12981299
rb_encoding *(*enc_compatible)(VALUE str1, VALUE str2);
12991300
VALUE (*enc_from_encoding)(rb_encoding *enc);
13001301
int (*encoding_get)(VALUE obj);
13011302
void (*encoding_set)(VALUE obj, int encindex);
13021303
int (*encoding_is_ascii8bit)(VALUE obj);
13031304
rb_encoding *(*usascii_encoding)(void);
1305+
int (*enc_str_coderange)(VALUE str);
13041306

13051307
/* Ractor */
13061308
VALUE (*ractor_make_shareable)(VALUE obj);

spec/ruby/language/hash_spec.rb

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -191,20 +191,22 @@ def h.to_hash; {:b => 2, :c => 3}; end
191191
usascii_hash.keys.first.encoding.should == Encoding::US_ASCII
192192
end
193193

194-
it "raises an EncodingError at parse time when Symbol key with invalid bytes" do
195-
ScratchPad.record []
196-
-> {
197-
eval 'ScratchPad << 1; {:"\xC3" => 1}'
198-
}.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
199-
ScratchPad.recorded.should == []
200-
end
194+
ruby_bug "#20280", ""..."3.3" do
195+
it "raises a SyntaxError at parse time when Symbol key with invalid bytes" do
196+
ScratchPad.record []
197+
-> {
198+
eval 'ScratchPad << 1; {:"\xC3" => 1}'
199+
}.should raise_error(SyntaxError, /invalid symbol/)
200+
ScratchPad.recorded.should == []
201+
end
201202

202-
it "raises an EncodingError at parse time when Symbol key with invalid bytes and 'key: value' syntax used" do
203-
ScratchPad.record []
204-
-> {
205-
eval 'ScratchPad << 1; {"\xC3": 1}'
206-
}.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
207-
ScratchPad.recorded.should == []
203+
it "raises a SyntaxError at parse time when Symbol key with invalid bytes and 'key: value' syntax used" do
204+
ScratchPad.record []
205+
-> {
206+
eval 'ScratchPad << 1; {"\xC3": 1}'
207+
}.should raise_error(SyntaxError, /invalid symbol/)
208+
ScratchPad.recorded.should == []
209+
end
208210
end
209211
end
210212

spec/ruby/language/symbol_spec.rb

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,13 @@
9696
%I{a b #{"c"}}.should == [:a, :b, :c]
9797
end
9898

99-
it "raises an EncodingError at parse time when Symbol with invalid bytes" do
100-
ScratchPad.record []
101-
-> {
102-
eval 'ScratchPad << 1; :"\xC3"'
103-
}.should raise_error(EncodingError, 'invalid symbol in encoding UTF-8 :"\xC3"')
104-
ScratchPad.recorded.should == []
99+
ruby_bug "#20280", ""..."3.3" do
100+
it "raises a SyntaxError at parse time when Symbol with invalid bytes" do
101+
ScratchPad.record []
102+
-> {
103+
eval 'ScratchPad << 1; :"\xC3"'
104+
}.should raise_error(SyntaxError, /invalid symbol/)
105+
ScratchPad.recorded.should == []
106+
end
105107
end
106108
end

test/ruby/test_syntax.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,6 +1354,10 @@ def test_do_block_in_hash_brace
13541354
assert_valid_syntax 'p :foo, {proc do end => proc do end, b: proc do end}', bug13073
13551355
end
13561356

1357+
def test_invalid_encoding_symbol
1358+
assert_syntax_error('{"\xC3": 1}', "invalid symbol")
1359+
end
1360+
13571361
def test_do_after_local_variable
13581362
obj = Object.new
13591363
def obj.m; yield; end

universal_parser.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,12 +283,14 @@ struct rb_imemo_tmpbuf_struct {
283283
#define rb_enc_isspace p->config->enc_isspace
284284
#define ENC_CODERANGE_7BIT p->config->enc_coderange_7bit
285285
#define ENC_CODERANGE_UNKNOWN p->config->enc_coderange_unknown
286+
#define ENC_CODERANGE_BROKEN p->config->enc_coderange_broken
286287
#define rb_enc_compatible p->config->enc_compatible
287288
#define rb_enc_from_encoding p->config->enc_from_encoding
288289
#define ENCODING_GET p->config->encoding_get
289290
#define ENCODING_SET p->config->encoding_set
290291
#define ENCODING_IS_ASCII8BIT p->config->encoding_is_ascii8bit
291292
#define rb_usascii_encoding p->config->usascii_encoding
293+
#define rb_enc_str_coderange p->config->enc_str_coderange
292294

293295
#define rb_ractor_make_shareable p->config->ractor_make_shareable
294296

0 commit comments

Comments
 (0)