Skip to content

Commit e2ec9e2

Browse files
committed
Respect the encoding of the source [Bug #18827]
Do not override the input string encoding at the time of preparation, the source encoding is not determined from the input yet.
1 parent c310691 commit e2ec9e2

File tree

3 files changed

+38
-10
lines changed

3 files changed

+38
-10
lines changed

parse.y

+16-10
Original file line numberDiff line numberDiff line change
@@ -6462,12 +6462,6 @@ lex_getline(struct parser_params *p)
64626462
if (NIL_P(line)) return line;
64636463
must_be_ascii_compatible(line);
64646464
if (RB_OBJ_FROZEN(line)) line = rb_str_dup(line); // needed for RubyVM::AST.of because script_lines in iseq is deep-frozen
6465-
#ifndef RIPPER
6466-
if (p->debug_lines) {
6467-
rb_enc_associate(line, p->enc);
6468-
rb_ary_push(p->debug_lines, line);
6469-
}
6470-
#endif
64716465
p->line_count++;
64726466
return line;
64736467
}
@@ -6614,7 +6608,7 @@ add_delayed_token(struct parser_params *p, const char *tok, const char *end)
66146608
#endif
66156609

66166610
static int
6617-
nextline(struct parser_params *p)
6611+
nextline(struct parser_params *p, int set_encoding)
66186612
{
66196613
VALUE v = p->lex.nextline;
66206614
p->lex.nextline = 0;
@@ -6632,6 +6626,12 @@ nextline(struct parser_params *p)
66326626
lex_goto_eol(p);
66336627
return -1;
66346628
}
6629+
#ifndef RIPPER
6630+
if (p->debug_lines) {
6631+
if (set_encoding) rb_enc_associate(v, p->enc);
6632+
rb_ary_push(p->debug_lines, v);
6633+
}
6634+
#endif
66356635
p->cr_seen = FALSE;
66366636
}
66376637
else if (NIL_P(v)) {
@@ -6663,12 +6663,12 @@ parser_cr(struct parser_params *p, int c)
66636663
}
66646664

66656665
static inline int
6666-
nextc(struct parser_params *p)
6666+
nextc0(struct parser_params *p, int set_encoding)
66676667
{
66686668
int c;
66696669

66706670
if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) {
6671-
if (nextline(p)) return -1;
6671+
if (nextline(p, set_encoding)) return -1;
66726672
}
66736673
c = (unsigned char)*p->lex.pcur++;
66746674
if (UNLIKELY(c == '\r')) {
@@ -6677,6 +6677,7 @@ nextc(struct parser_params *p)
66776677

66786678
return c;
66796679
}
6680+
#define nextc(p) nextc0(p, TRUE)
66806681

66816682
static void
66826683
pushback(struct parser_params *p, int c)
@@ -8467,7 +8468,7 @@ set_file_encoding(struct parser_params *p, const char *str, const char *send)
84678468
static void
84688469
parser_prepare(struct parser_params *p)
84698470
{
8470-
int c = nextc(p);
8471+
int c = nextc0(p, FALSE);
84718472
p->token_info_enabled = !compile_for_eval && RTEST(ruby_verbose);
84728473
switch (c) {
84738474
case '#':
@@ -8479,6 +8480,11 @@ parser_prepare(struct parser_params *p)
84798480
(unsigned char)p->lex.pcur[1] == 0xbf) {
84808481
p->enc = rb_utf8_encoding();
84818482
p->lex.pcur += 2;
8483+
#ifndef RIPPER
8484+
if (p->debug_lines) {
8485+
rb_enc_associate(p->lex.lastline, p->enc);
8486+
}
8487+
#endif
84828488
p->lex.pbeg = p->lex.pcur;
84838489
return;
84848490
}

test/ruby/test_ast.rb

+13
Original file line numberDiff line numberDiff line change
@@ -542,6 +542,19 @@ def test_keep_script_lines_for_of
542542
assert_equal("def test_keep_script_lines_for_of\n", node_method.source.lines.first)
543543
end
544544

545+
def test_encoding_with_keep_script_lines
546+
enc = Encoding::EUC_JP
547+
code = "__ENCODING__".encode(enc)
548+
549+
assert_equal(enc, eval(code))
550+
551+
node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: false)
552+
assert_equal(enc, node.children[2].children[0])
553+
554+
node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: true)
555+
assert_equal(enc, node.children[2].children[0])
556+
end
557+
545558
def test_e_option
546559
assert_in_out_err(["-e", "def foo; end; pp RubyVM::AbstractSyntaxTree.of(method(:foo)).type"],
547560
"", [":SCOPE"], [])

test/ruby/test_syntax.rb

+9
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,15 @@ def test_script_lines
6666
f&.close!
6767
end
6868

69+
def test_script_lines_encoding
70+
require 'tmpdir'
71+
Dir.mktmpdir do |dir|
72+
File.write(File.join(dir, "script_lines.rb"), "SCRIPT_LINES__ = {}\n")
73+
assert_in_out_err(%w"-r./script_lines -w -Ke", "puts __ENCODING__.name",
74+
%w"EUC-JP", /-K is specified/, chdir: dir)
75+
end
76+
end
77+
6978
def test_anonymous_block_forwarding
7079
assert_syntax_error("def b; c(&); end", /no anonymous block parameter/)
7180
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")

0 commit comments

Comments
 (0)