Skip to content

Commit af20309

Browse files
author
Charlie Somerville
committed
backport fstrings
1 parent f1e91d4 commit af20309

File tree

6 files changed

+158
-9
lines changed

6 files changed

+158
-9
lines changed

NEWS

+2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ with all sufficient information, see the ChangeLog file.
2121

2222
* No warning for unused variables starting with '_'
2323

24+
* Added 'f' suffix for string literals that returns a frozen String object.
25+
2426
=== Core classes updates (outstanding ones only)
2527

2628
* ARGF

compile.c

+2-1
Original file line numberDiff line numberDiff line change
@@ -2489,7 +2489,8 @@ case_when_optimizable_literal(NODE * node)
24892489
modf(RFLOAT_VALUE(v), &ival) == 0.0) {
24902490
return FIXABLE(ival) ? LONG2FIX((long)ival) : rb_dbl2big(ival);
24912491
}
2492-
if (SYMBOL_P(v) || rb_obj_is_kind_of(v, rb_cNumeric)) {
2492+
if (SYMBOL_P(v) || RB_TYPE_P(v, T_STRING) ||
2493+
rb_obj_is_kind_of(v, rb_cNumeric)) {
24932494
return v;
24942495
}
24952496
break;

ext/ripper/eventids2.c

+3
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ static ID ripper_id_symbeg;
3636
static ID ripper_id_tstring_beg;
3737
static ID ripper_id_tstring_content;
3838
static ID ripper_id_tstring_end;
39+
static ID ripper_id_tstring_suffix;
3940
static ID ripper_id_words_beg;
4041
static ID ripper_id_qwords_beg;
4142
static ID ripper_id_qsymbols_beg;
@@ -91,6 +92,7 @@ ripper_init_eventids2(void)
9192
ripper_id_tstring_beg = rb_intern_const("on_tstring_beg");
9293
ripper_id_tstring_content = rb_intern_const("on_tstring_content");
9394
ripper_id_tstring_end = rb_intern_const("on_tstring_end");
95+
ripper_id_tstring_suffix = rb_intern_const("on_tstring_suffix");
9496
ripper_id_words_beg = rb_intern_const("on_words_beg");
9597
ripper_id_qwords_beg = rb_intern_const("on_qwords_beg");
9698
ripper_id_qsymbols_beg = rb_intern_const("on_qsymbols_beg");
@@ -246,6 +248,7 @@ static const struct token_assoc {
246248
{tSTRING_DEND, &ripper_id_embexpr_end},
247249
{tSTRING_DVAR, &ripper_id_embvar},
248250
{tSTRING_END, &ripper_id_tstring_end},
251+
{tSTRING_SUFFIX, &ripper_id_tstring_suffix},
249252
{tSYMBEG, &ripper_id_symbeg},
250253
{tUMINUS, &ripper_id_op},
251254
{tUMINUS_NUM, &ripper_id_op},

parse.y

+99-8
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,8 @@ static NODE *new_evstr_gen(struct parser_params*,NODE*);
396396
#define new_evstr(n) new_evstr_gen(parser,(n))
397397
static NODE *evstr2dstr_gen(struct parser_params*,NODE*);
398398
#define evstr2dstr(n) evstr2dstr_gen(parser,(n))
399+
static NODE *str_suffix_gen(struct parser_params*, NODE*, long);
400+
#define str_suffix(n,o) str_suffix_gen(parser,(n),(o))
399401
static NODE *splat_array(NODE*);
400402

401403
static NODE *call_bin_op_gen(struct parser_params*,NODE*,ID,NODE*);
@@ -525,6 +527,9 @@ static int lvar_defined_gen(struct parser_params*, ID);
525527
#define RE_OPTION_MASK 0xff
526528
#define RE_OPTION_ARG_ENCODING_NONE 32
527529

530+
#define STR_OPTION_FROZEN 1
531+
#define STR_OPTION_BINARY 0 /* disabled */
532+
528533
#define NODE_STRTERM NODE_ZARRAY /* nothing to gc */
529534
#define NODE_HEREDOC NODE_ARRAY /* 1, 3 to gc */
530535
#define SIGN_EXTEND(x,n) (((1<<(n)-1)^((x)&~(~0<<(n))))-(1<<(n)-1))
@@ -749,7 +754,7 @@ static void token_info_pop(struct parser_params*, const char *token);
749754
%token <id> tIDENTIFIER tFID tGVAR tIVAR tCONSTANT tCVAR tLABEL
750755
%token <node> tINTEGER tFLOAT tSTRING_CONTENT tCHAR
751756
%token <node> tNTH_REF tBACK_REF
752-
%token <num> tREGEXP_END
757+
%token <num> tREGEXP_END tSTRING_SUFFIX
753758

754759
%type <node> singleton strings string string1 xstring regexp
755760
%type <node> string_contents xstring_contents regexp_contents string_content
@@ -775,6 +780,7 @@ static void token_info_pop(struct parser_params*, const char *token);
775780
%type <id> fsym keyword_variable user_variable sym symbol operation operation2 operation3
776781
%type <id> cname fname op f_rest_arg f_block_arg opt_f_block_arg f_norm_arg f_bad_arg
777782
%type <id> f_kwrest
783+
%type <num> opt_string_sfx
778784
/*%%%*/
779785
/*%
780786
%type <val> program reswords then do dot_or_colon
@@ -3811,7 +3817,7 @@ literal : numeric
38113817
| dsym
38123818
;
38133819

3814-
strings : string
3820+
strings : string opt_string_sfx
38153821
{
38163822
/*%%%*/
38173823
NODE *node = $1;
@@ -3821,6 +3827,7 @@ strings : string
38213827
else {
38223828
node = evstr2dstr(node);
38233829
}
3830+
node = str_suffix(node, $2);
38243831
$$ = node;
38253832
/*%
38263833
$$ = $1;
@@ -3850,6 +3857,10 @@ string1 : tSTRING_BEG string_contents tSTRING_END
38503857
}
38513858
;
38523859

3860+
opt_string_sfx : tSTRING_SUFFIX
3861+
| /* none */ {$$ = 0;}
3862+
;
3863+
38533864
xstring : tXSTRING_BEG xstring_contents tSTRING_END
38543865
{
38553866
/*%%%*/
@@ -4992,6 +5003,7 @@ none : /* none */
49925003
# define yylval (*((YYSTYPE*)(parser->parser_yylval)))
49935004

49945005
static int parser_regx_options(struct parser_params*);
5006+
static int parser_str_options(struct parser_params*);
49955007
static int parser_tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**);
49965008
static void parser_tokaddmbc(struct parser_params *parser, int c, rb_encoding *enc);
49975009
static int parser_parse_string(struct parser_params*,NODE*);
@@ -5007,6 +5019,7 @@ static int parser_here_document(struct parser_params*,NODE*);
50075019
# define read_escape(flags,e) parser_read_escape(parser, (flags), (e))
50085020
# define tokadd_escape(e) parser_tokadd_escape(parser, (e))
50095021
# define regx_options() parser_regx_options(parser)
5022+
# define str_options() parser_str_options(parser)
50105023
# define tokadd_string(f,t,p,n,e) parser_tokadd_string(parser,(f),(t),(p),(n),(e))
50115024
# define parse_string(n) parser_parse_string(parser,(n))
50125025
# define tokaddmbc(c, enc) parser_tokaddmbc(parser, (c), (enc))
@@ -5501,10 +5514,11 @@ rb_parser_compile_file(volatile VALUE vparser, const char *f, VALUE file, int st
55015514
#define STR_FUNC_QWORDS 0x08
55025515
#define STR_FUNC_SYMBOL 0x10
55035516
#define STR_FUNC_INDENT 0x20
5517+
#define STR_FUNC_OPTION 0x40
55045518

55055519
enum string_type {
5506-
str_squote = (0),
5507-
str_dquote = (STR_FUNC_EXPAND),
5520+
str_squote = (STR_FUNC_OPTION),
5521+
str_dquote = (STR_FUNC_EXPAND|STR_FUNC_OPTION),
55085522
str_xquote = (STR_FUNC_EXPAND),
55095523
str_regexp = (STR_FUNC_REGEXP|STR_FUNC_ESCAPE|STR_FUNC_EXPAND),
55105524
str_sword = (STR_FUNC_QWORDS),
@@ -5945,6 +5959,40 @@ parser_regx_options(struct parser_params *parser)
59455959
return options | RE_OPTION_ENCODING(kcode);
59465960
}
59475961

5962+
static int
5963+
parser_str_options(struct parser_params *parser)
5964+
{
5965+
int c, options = 0;
5966+
5967+
newtok();
5968+
while (c = nextc(), ISALPHA(c)) {
5969+
switch (c) {
5970+
#if STR_OPTION_FROZEN
5971+
case 'f':
5972+
options |= STR_OPTION_FROZEN;
5973+
break;
5974+
#endif
5975+
#if STR_OPTION_BINARY
5976+
case 'b':
5977+
options |= STR_OPTION_BINARY;
5978+
break;
5979+
#endif
5980+
default:
5981+
tokadd(c);
5982+
break;
5983+
}
5984+
}
5985+
pushback(c);
5986+
5987+
if (toklen()) {
5988+
tokfix();
5989+
compile_error(PARSER_ARG "unknown string option%s - %s",
5990+
toklen() > 1 ? "s" : "", tok());
5991+
}
5992+
5993+
return options;
5994+
}
5995+
59485996
static void
59495997
dispose_string(VALUE str)
59505998
{
@@ -6211,6 +6259,10 @@ parser_parse_string(struct parser_params *parser, NODE *quote)
62116259
rb_encoding *enc = current_enc;
62126260

62136261
if (func == -1) return tSTRING_END;
6262+
if (func == 0) {
6263+
set_yylval_num(term);
6264+
return tSTRING_SUFFIX;
6265+
}
62146266
c = nextc();
62156267
if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
62166268
do {c = nextc();} while (ISSPACE(c));
@@ -6219,11 +6271,18 @@ parser_parse_string(struct parser_params *parser, NODE *quote)
62196271
if (c == term && !quote->nd_nest) {
62206272
if (func & STR_FUNC_QWORDS) {
62216273
quote->nd_func = -1;
6274+
quote->u2.id = 0;
62226275
return ' ';
62236276
}
6224-
if (!(func & STR_FUNC_REGEXP)) return tSTRING_END;
6225-
set_yylval_num(regx_options());
6226-
return tREGEXP_END;
6277+
if (func & STR_FUNC_REGEXP) {
6278+
set_yylval_num(regx_options());
6279+
return tREGEXP_END;
6280+
}
6281+
if ((func & STR_FUNC_OPTION) && (func = str_options()) != 0) {
6282+
quote->nd_func = 0;
6283+
quote->u2.id = func;
6284+
}
6285+
return tSTRING_END;
62276286
}
62286287
if (space) {
62296288
pushback(c);
@@ -6852,7 +6911,8 @@ parser_yylex(struct parser_params *parser)
68526911
}
68536912
else {
68546913
token = parse_string(lex_strterm);
6855-
if (token == tSTRING_END || token == tREGEXP_END) {
6914+
if ((token == tSTRING_END && lex_strterm->nd_func) ||
6915+
token == tSTRING_SUFFIX || token == tREGEXP_END) {
68566916
rb_gc_force_recycle((VALUE)lex_strterm);
68576917
lex_strterm = 0;
68586918
lex_state = EXPR_END;
@@ -8383,6 +8443,37 @@ evstr2dstr_gen(struct parser_params *parser, NODE *node)
83838443
return node;
83848444
}
83858445

8446+
static NODE *
8447+
str_suffix_gen(struct parser_params *parser, NODE *node, long opt)
8448+
{
8449+
if (nd_type(node) == NODE_STR) {
8450+
#if STR_OPTION_BINARY
8451+
if (opt & STR_OPTION_BINARY) {
8452+
rb_enc_associate_index(node->nd_lit, ENCINDEX_ASCII);
8453+
}
8454+
#endif
8455+
#if STR_OPTION_FROZEN
8456+
if (opt & STR_OPTION_FROZEN) {
8457+
OBJ_FREEZE(node->nd_lit);
8458+
nd_set_type(node, NODE_LIT);
8459+
}
8460+
#endif
8461+
}
8462+
else {
8463+
#if STR_OPTION_BINARY
8464+
if (opt & STR_OPTION_BINARY) {
8465+
node = NEW_CALL(node, rb_intern("b"), 0);
8466+
}
8467+
#endif
8468+
#if STR_OPTION_FROZEN
8469+
if (opt & STR_OPTION_FROZEN) {
8470+
node = NEW_CALL(node, rb_intern("freeze"), 0);
8471+
}
8472+
#endif
8473+
}
8474+
return node;
8475+
}
8476+
83868477
static NODE *
83878478
new_evstr_gen(struct parser_params *parser, NODE *node)
83888479
{

test/ripper/test_scanner_events.rb

+9
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,15 @@ def test_tstring_end
566566
scan('tstring_end', '%Q[abcdef]')
567567
end
568568

569+
def test_tstring_suffix
570+
assert_equal ['"f'],
571+
scan('tstring_end', '"abcdef"f')
572+
assert_equal [']f'],
573+
scan('tstring_end', '%q[abcdef]f')
574+
assert_equal [']f'],
575+
scan('tstring_end', '%Q[abcdef]f')
576+
end
577+
569578
def test_regexp_beg
570579
assert_equal [],
571580
scan('regexp_beg', '')

test/ruby/test_string.rb

+43
Original file line numberDiff line numberDiff line change
@@ -2193,6 +2193,49 @@ def test_byteslice
21932193
assert_equal(false, "\u3042".byteslice(0, 2).valid_encoding?)
21942194
assert_equal(false, ("\u3042"*10).byteslice(0, 20).valid_encoding?)
21952195
end
2196+
2197+
def test_unknown_string_option
2198+
assert_raises(SyntaxError) do
2199+
eval(%{
2200+
"hello"x
2201+
})
2202+
end
2203+
end
2204+
2205+
def test_frozen_string
2206+
assert_equal "hello", "hello"f
2207+
2208+
assert_predicate "hello"f, :frozen?
2209+
2210+
f = -> { "hello"f }
2211+
2212+
assert_equal f.call.object_id, f.call.object_id
2213+
end
2214+
2215+
def test_frozen_dstring
2216+
assert_equal "hello123", "hello#{123}"f
2217+
2218+
assert_predicate "hello#{123}"f, :frozen?
2219+
2220+
i = 0
2221+
f = -> { "#{i += 1}"f }
2222+
assert_equal "1", f.call
2223+
assert_equal "2", f.call
2224+
end
2225+
2226+
def test_frozen_string_cannot_be_adjacent
2227+
assert_raises(SyntaxError) do
2228+
eval(%{
2229+
"hello"f "world"
2230+
})
2231+
end
2232+
2233+
assert_raises(SyntaxError) do
2234+
eval(%{
2235+
"hello"f "world"
2236+
})
2237+
end
2238+
end
21962239
end
21972240

21982241
class TestString2 < TestString

0 commit comments

Comments
 (0)