From af203093e17ec2faa8cbef9572b2939ea3d1f05e Mon Sep 17 00:00:00 2001 From: Charlie Somerville Date: Thu, 5 Sep 2013 16:37:11 +1000 Subject: [PATCH 1/2] backport fstrings --- NEWS | 2 + compile.c | 3 +- ext/ripper/eventids2.c | 3 + parse.y | 107 ++++++++++++++++++++++++++--- test/ripper/test_scanner_events.rb | 9 +++ test/ruby/test_string.rb | 43 ++++++++++++ 6 files changed, 158 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index 5e0c0585a2e106..7edf4aadc92af4 100644 --- a/NEWS +++ b/NEWS @@ -21,6 +21,8 @@ with all sufficient information, see the ChangeLog file. * No warning for unused variables starting with '_' +* Added 'f' suffix for string literals that returns a frozen String object. + === Core classes updates (outstanding ones only) * ARGF diff --git a/compile.c b/compile.c index 50b3d4c29a1f4f..6c0dfff4c10c61 100644 --- a/compile.c +++ b/compile.c @@ -2489,7 +2489,8 @@ case_when_optimizable_literal(NODE * node) modf(RFLOAT_VALUE(v), &ival) == 0.0) { return FIXABLE(ival) ? LONG2FIX((long)ival) : rb_dbl2big(ival); } - if (SYMBOL_P(v) || rb_obj_is_kind_of(v, rb_cNumeric)) { + if (SYMBOL_P(v) || RB_TYPE_P(v, T_STRING) || + rb_obj_is_kind_of(v, rb_cNumeric)) { return v; } break; diff --git a/ext/ripper/eventids2.c b/ext/ripper/eventids2.c index 841a18c583f540..56740adf837801 100644 --- a/ext/ripper/eventids2.c +++ b/ext/ripper/eventids2.c @@ -36,6 +36,7 @@ static ID ripper_id_symbeg; static ID ripper_id_tstring_beg; static ID ripper_id_tstring_content; static ID ripper_id_tstring_end; +static ID ripper_id_tstring_suffix; static ID ripper_id_words_beg; static ID ripper_id_qwords_beg; static ID ripper_id_qsymbols_beg; @@ -91,6 +92,7 @@ ripper_init_eventids2(void) ripper_id_tstring_beg = rb_intern_const("on_tstring_beg"); ripper_id_tstring_content = rb_intern_const("on_tstring_content"); ripper_id_tstring_end = rb_intern_const("on_tstring_end"); + ripper_id_tstring_suffix = rb_intern_const("on_tstring_suffix"); ripper_id_words_beg = rb_intern_const("on_words_beg"); ripper_id_qwords_beg = rb_intern_const("on_qwords_beg"); ripper_id_qsymbols_beg = rb_intern_const("on_qsymbols_beg"); @@ -246,6 +248,7 @@ static const struct token_assoc { {tSTRING_DEND, &ripper_id_embexpr_end}, {tSTRING_DVAR, &ripper_id_embvar}, {tSTRING_END, &ripper_id_tstring_end}, + {tSTRING_SUFFIX, &ripper_id_tstring_suffix}, {tSYMBEG, &ripper_id_symbeg}, {tUMINUS, &ripper_id_op}, {tUMINUS_NUM, &ripper_id_op}, diff --git a/parse.y b/parse.y index 1bdba058e18355..704de64ad29134 100644 --- a/parse.y +++ b/parse.y @@ -396,6 +396,8 @@ static NODE *new_evstr_gen(struct parser_params*,NODE*); #define new_evstr(n) new_evstr_gen(parser,(n)) static NODE *evstr2dstr_gen(struct parser_params*,NODE*); #define evstr2dstr(n) evstr2dstr_gen(parser,(n)) +static NODE *str_suffix_gen(struct parser_params*, NODE*, long); +#define str_suffix(n,o) str_suffix_gen(parser,(n),(o)) static NODE *splat_array(NODE*); static NODE *call_bin_op_gen(struct parser_params*,NODE*,ID,NODE*); @@ -525,6 +527,9 @@ static int lvar_defined_gen(struct parser_params*, ID); #define RE_OPTION_MASK 0xff #define RE_OPTION_ARG_ENCODING_NONE 32 +#define STR_OPTION_FROZEN 1 +#define STR_OPTION_BINARY 0 /* disabled */ + #define NODE_STRTERM NODE_ZARRAY /* nothing to gc */ #define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */ #define SIGN_EXTEND(x,n) (((1<<(n)-1)^((x)&~(~0<<(n))))-(1<<(n)-1)) @@ -749,7 +754,7 @@ static void token_info_pop(struct parser_params*, const char *token); %token tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL %token tINTEGER tFLOAT tSTRING_CONTENT tCHAR %token tNTH_REF tBACK_REF -%token tREGEXP_END +%token tREGEXP_END tSTRING_SUFFIX %type singleton strings string string1 xstring regexp %type string_contents xstring_contents regexp_contents string_content @@ -775,6 +780,7 @@ static void token_info_pop(struct parser_params*, const char *token); %type fsym keyword_variable user_variable sym symbol operation operation2 operation3 %type cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg %type f_kwrest +%type opt_string_sfx /*%%%*/ /*% %type program reswords then do dot_or_colon @@ -3811,7 +3817,7 @@ literal : numeric | dsym ; -strings : string +strings : string opt_string_sfx { /*%%%*/ NODE *node = $1; @@ -3821,6 +3827,7 @@ strings : string else { node = evstr2dstr(node); } + node = str_suffix(node, $2); $$ = node; /*% $$ = $1; @@ -3850,6 +3857,10 @@ string1 : tSTRING_BEG string_contents tSTRING_END } ; +opt_string_sfx : tSTRING_SUFFIX + | /* none */ {$$ = 0;} + ; + xstring : tXSTRING_BEG xstring_contents tSTRING_END { /*%%%*/ @@ -4992,6 +5003,7 @@ none : /* none */ # define yylval (*((YYSTYPE*)(parser->parser_yylval))) static int parser_regx_options(struct parser_params*); +static int parser_str_options(struct parser_params*); static int parser_tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**); static void parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc); static int parser_parse_string(struct parser_params*,NODE*); @@ -5007,6 +5019,7 @@ static int parser_here_document(struct parser_params*,NODE*); # define read_escape(flags,e) parser_read_escape(parser, (flags), (e)) # define tokadd_escape(e) parser_tokadd_escape(parser, (e)) # define regx_options() parser_regx_options(parser) +# define str_options() parser_str_options(parser) # define tokadd_string(f,t,p,n,e) parser_tokadd_string(parser,(f),(t),(p),(n),(e)) # define parse_string(n) parser_parse_string(parser,(n)) # define tokaddmbc(c, enc) parser_tokaddmbc(parser, (c), (enc)) @@ -5501,10 +5514,11 @@ rb_parser_compile_file(volatile VALUE vparser, const char *f, VALUE file, int st #define STR_FUNC_QWORDS 0x08 #define STR_FUNC_SYMBOL 0x10 #define STR_FUNC_INDENT 0x20 +#define STR_FUNC_OPTION 0x40 enum string_type { - str_squote = (0), - str_dquote = (STR_FUNC_EXPAND), + str_squote = (STR_FUNC_OPTION), + str_dquote = (STR_FUNC_EXPAND|STR_FUNC_OPTION), str_xquote = (STR_FUNC_EXPAND), str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND), str_sword = (STR_FUNC_QWORDS), @@ -5945,6 +5959,40 @@ parser_regx_options(struct parser_params *parser) return options | RE_OPTION_ENCODING(kcode); } +static int +parser_str_options(struct parser_params *parser) +{ + int c, options = 0; + + newtok(); + while (c = nextc(), ISALPHA(c)) { + switch (c) { +#if STR_OPTION_FROZEN + case 'f': + options |= STR_OPTION_FROZEN; + break; +#endif +#if STR_OPTION_BINARY + case 'b': + options |= STR_OPTION_BINARY; + break; +#endif + default: + tokadd(c); + break; + } + } + pushback(c); + + if (toklen()) { + tokfix(); + compile_error(PARSER_ARG "unknown string option%s - %s", + toklen() > 1 ? "s" : "", tok()); + } + + return options; +} + static void dispose_string(VALUE str) { @@ -6211,6 +6259,10 @@ parser_parse_string(struct parser_params *parser, NODE *quote) rb_encoding *enc = current_enc; if (func == -1) return tSTRING_END; + if (func == 0) { + set_yylval_num(term); + return tSTRING_SUFFIX; + } c = nextc(); if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) { do {c = nextc();} while (ISSPACE(c)); @@ -6219,11 +6271,18 @@ parser_parse_string(struct parser_params *parser, NODE *quote) if (c == term && !quote->nd_nest) { if (func & STR_FUNC_QWORDS) { quote->nd_func = -1; + quote->u2.id = 0; return ' '; } - if (!(func & STR_FUNC_REGEXP)) return tSTRING_END; - set_yylval_num(regx_options()); - return tREGEXP_END; + if (func & STR_FUNC_REGEXP) { + set_yylval_num(regx_options()); + return tREGEXP_END; + } + if ((func & STR_FUNC_OPTION) && (func = str_options()) != 0) { + quote->nd_func = 0; + quote->u2.id = func; + } + return tSTRING_END; } if (space) { pushback(c); @@ -6852,7 +6911,8 @@ parser_yylex(struct parser_params *parser) } else { token = parse_string(lex_strterm); - if (token == tSTRING_END || token == tREGEXP_END) { + if ((token == tSTRING_END && lex_strterm->nd_func) || + token == tSTRING_SUFFIX || token == tREGEXP_END) { rb_gc_force_recycle((VALUE)lex_strterm); lex_strterm = 0; lex_state = EXPR_END; @@ -8383,6 +8443,37 @@ evstr2dstr_gen(struct parser_params *parser, NODE *node) return node; } +static NODE * +str_suffix_gen(struct parser_params *parser, NODE *node, long opt) +{ + if (nd_type(node) == NODE_STR) { +#if STR_OPTION_BINARY + if (opt & STR_OPTION_BINARY) { + rb_enc_associate_index(node->nd_lit, ENCINDEX_ASCII); + } +#endif +#if STR_OPTION_FROZEN + if (opt & STR_OPTION_FROZEN) { + OBJ_FREEZE(node->nd_lit); + nd_set_type(node, NODE_LIT); + } +#endif + } + else { +#if STR_OPTION_BINARY + if (opt & STR_OPTION_BINARY) { + node = NEW_CALL(node, rb_intern("b"), 0); + } +#endif +#if STR_OPTION_FROZEN + if (opt & STR_OPTION_FROZEN) { + node = NEW_CALL(node, rb_intern("freeze"), 0); + } +#endif + } + return node; +} + static NODE * new_evstr_gen(struct parser_params *parser, NODE *node) { diff --git a/test/ripper/test_scanner_events.rb b/test/ripper/test_scanner_events.rb index a96edef0633fd0..5568aea8ea454c 100644 --- a/test/ripper/test_scanner_events.rb +++ b/test/ripper/test_scanner_events.rb @@ -566,6 +566,15 @@ def test_tstring_end scan('tstring_end', '%Q[abcdef]') end + def test_tstring_suffix + assert_equal ['"f'], + scan('tstring_end', '"abcdef"f') + assert_equal [']f'], + scan('tstring_end', '%q[abcdef]f') + assert_equal [']f'], + scan('tstring_end', '%Q[abcdef]f') + end + def test_regexp_beg assert_equal [], scan('regexp_beg', '') diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index 3a60213c2e50e5..1586b73a13508b 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -2193,6 +2193,49 @@ def test_byteslice assert_equal(false, "\u3042".byteslice(0, 2).valid_encoding?) assert_equal(false, ("\u3042"*10).byteslice(0, 20).valid_encoding?) end + + def test_unknown_string_option + assert_raises(SyntaxError) do + eval(%{ + "hello"x + }) + end + end + + def test_frozen_string + assert_equal "hello", "hello"f + + assert_predicate "hello"f, :frozen? + + f = -> { "hello"f } + + assert_equal f.call.object_id, f.call.object_id + end + + def test_frozen_dstring + assert_equal "hello123", "hello#{123}"f + + assert_predicate "hello#{123}"f, :frozen? + + i = 0 + f = -> { "#{i += 1}"f } + assert_equal "1", f.call + assert_equal "2", f.call + end + + def test_frozen_string_cannot_be_adjacent + assert_raises(SyntaxError) do + eval(%{ + "hello"f "world" + }) + end + + assert_raises(SyntaxError) do + eval(%{ + "hello"f "world" + }) + end + end end class TestString2 < TestString From e8adba64a8747d307c14b3ac0879c800d3b6c6ce Mon Sep 17 00:00:00 2001 From: Charlie Somerville Date: Thu, 5 Sep 2013 16:50:43 +1000 Subject: [PATCH 2/2] fix syntax errors --- test/rake/test_rake_rules.rb | 6 +++--- test/rss/rss-assertions.rb | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/rake/test_rake_rules.rb b/test/rake/test_rake_rules.rb index 3f6e35232226e6..664aa983e57f88 100644 --- a/test/rake/test_rake_rules.rb +++ b/test/rake/test_rake_rules.rb @@ -298,9 +298,9 @@ def test_recursive_rules_will_work_as_long_as_they_terminate actions = [] create_file("abc.xml") rule '.y' => '.xml' do actions << 'y' end - rule '.c' => '.y' do actions << 'c'end - rule '.o' => '.c' do actions << 'o'end - rule '.exe' => '.o' do actions << 'exe'end + rule '.c' => '.y' do actions << 'c' end + rule '.o' => '.c' do actions << 'o' end + rule '.exe' => '.o' do actions << 'exe' end Task["abc.exe"].invoke assert_equal ['y', 'c', 'o', 'exe'], actions end diff --git a/test/rss/rss-assertions.rb b/test/rss/rss-assertions.rb index 090980a650521f..5763a55eb17131 100644 --- a/test/rss/rss-assertions.rb +++ b/test/rss/rss-assertions.rb @@ -332,7 +332,7 @@ def assert_atom_text_construct(tag_name, generator) _wrap_assertion do [nil, "text", "html"].each do |type| attr = "" - attr = " type=\"#{type}\""if type + attr = " type=\"#{type}\"" if type assert_parse(generator.call(<<-EOA), :nothing_raised) <#{tag_name}#{attr}/> EOA