Skip to content

Commit 620ba74

Browse files
committed
Manage AST NODEs out of GC
NODEs in AST are no longer objects managed by GC. This change will remove the restriction imposed by the GC. For example, a NODE can use more than five words (this is my primary purpose; we want to store the position data for each NODE, for coverage library), or even a NODE can have variable length (some kinds of NODEs have unused fields). To do this, however, we need more work, since Ripper still uses T_NODE objects managed by the GC. The life time of NODEs is more obvious than other kinds of objects; they are created at parsing, and they become disused immediately after compilation. This change releases all NODEs by a few `xfree`s after compilation, so performance will be improved a bit. In extreme example, `eval("x=1;" * 10000000)` runs much faster (40 sec. -> 7.8 sec. on my machine). The most important part of this change is `ast_t` struct, which has three contents: (1) NODE buffer (malloc'ed memory), (2) a reference to the root NODE, and (3) an array that contains objects that must be marked during parsing (such as literal objects). Some functions that had received `NODE*` arguments, must now receive `ast_t*`. * node.c, node.h: defines `ast_t` struct and related operations. * gc.c, internal.h: defines `imemo_ast`. * parse.y: makes `parser_params` struct have a reference to `ast_t`. Instead of `rb_node_newnode`, use `rb_ast_newnode` to create a NODE. * iseq.c, load.c, ruby.c, template/prelude.c.tmpl: modifies some functions to handle `ast_t*` instead of `NODE*`. * test/ruby/test_gc.rb: ad-hoc fix for a failed test. The test assumes GC eden is increased at startup by NODE object creation. However, this change now create no NODE object, so GC eden is not necessarily increased. git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@60485 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
1 parent 0b19ac6 commit 620ba74

File tree

10 files changed

+250
-68
lines changed

10 files changed

+250
-68
lines changed

gc.c

+7
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,7 @@ typedef struct RVALUE {
434434
const rb_iseq_t iseq;
435435
rb_env_t env;
436436
struct rb_imemo_alloc_struct alloc;
437+
ast_t ast;
437438
} imemo;
438439
struct {
439440
struct RBasic basic;
@@ -2359,6 +2360,9 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
23592360
case imemo_alloc:
23602361
xfree(RANY(obj)->as.imemo.alloc.ptr);
23612362
break;
2363+
case imemo_ast:
2364+
rb_ast_free(&RANY(obj)->as.imemo.ast);
2365+
break;
23622366
default:
23632367
break;
23642368
}
@@ -4540,6 +4544,9 @@ gc_mark_imemo(rb_objspace_t *objspace, VALUE obj)
45404544
} while ((m = m->next) != NULL);
45414545
}
45424546
return;
4547+
case imemo_ast:
4548+
rb_ast_mark(&RANY(obj)->as.imemo.ast);
4549+
return;
45434550
#if VM_CHECK_MODE > 0
45444551
default:
45454552
VM_UNREACHABLE(gc_mark_imemo);

internal.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -844,7 +844,8 @@ enum imemo_type {
844844
imemo_memo = 5,
845845
imemo_ment = 6,
846846
imemo_iseq = 7,
847-
imemo_alloc = 8
847+
imemo_alloc = 8,
848+
imemo_ast = 9
848849
};
849850
#define IMEMO_MASK 0x0f
850851

iseq.c

+21-14
Original file line numberDiff line numberDiff line change
@@ -641,9 +641,9 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, c
641641
#else
642642
# define INITIALIZED /* volatile */
643643
#endif
644-
NODE *(*parse)(VALUE vparser, VALUE fname, VALUE file, int start);
644+
ast_t *(*parse)(VALUE vparser, VALUE fname, VALUE file, int start);
645645
int ln;
646-
NODE *INITIALIZED node;
646+
ast_t *INITIALIZED ast;
647647

648648
/* safe results first */
649649
make_compile_option(&option, opt);
@@ -659,18 +659,20 @@ rb_iseq_compile_with_option(VALUE src, VALUE file, VALUE realpath, VALUE line, c
659659
{
660660
const VALUE parser = rb_parser_new();
661661
rb_parser_set_context(parser, base_block, FALSE);
662-
node = (*parse)(parser, file, src, ln);
662+
ast = (*parse)(parser, file, src, ln);
663663
}
664664

665-
if (!node) {
665+
if (!ast->root) {
666+
rb_ast_dispose(ast);
666667
rb_exc_raise(th->ec->errinfo);
667668
}
668669
else {
669670
INITIALIZED VALUE label = parent ?
670671
parent->body->location.label :
671672
rb_fstring_cstr("<compiled>");
672-
iseq = rb_iseq_new_with_opt(node, label, file, realpath, line,
673+
iseq = rb_iseq_new_with_opt(ast->root, label, file, realpath, line,
673674
parent, type, &option);
675+
rb_ast_dispose(ast);
674676
}
675677

676678
return iseq;
@@ -851,8 +853,8 @@ static VALUE
851853
iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
852854
{
853855
VALUE file, line = INT2FIX(1), opt = Qnil;
854-
VALUE parser, f, exc = Qnil;
855-
const NODE *node;
856+
VALUE parser, f, exc = Qnil, ret;
857+
ast_t *ast;
856858
rb_compile_option_t option;
857859
int i;
858860

@@ -869,18 +871,23 @@ iseqw_s_compile_file(int argc, VALUE *argv, VALUE self)
869871

870872
parser = rb_parser_new();
871873
rb_parser_set_context(parser, NULL, FALSE);
872-
node = rb_parser_compile_file_path(parser, file, f, NUM2INT(line));
873-
if (!node) exc = GET_EC()->errinfo;
874+
ast = rb_parser_compile_file_path(parser, file, f, NUM2INT(line));
875+
if (!ast->root) exc = GET_EC()->errinfo;
874876

875877
rb_io_close(f);
876-
if (!node) rb_exc_raise(exc);
878+
if (!ast->root) {
879+
rb_ast_dispose(ast);
880+
rb_exc_raise(exc);
881+
}
877882

878883
make_compile_option(&option, opt);
879884

880-
return iseqw_new(rb_iseq_new_with_opt(node, rb_fstring_cstr("<main>"),
881-
file,
882-
rb_realpath_internal(Qnil, file, 1),
883-
line, NULL, ISEQ_TYPE_TOP, &option));
885+
ret = iseqw_new(rb_iseq_new_with_opt(ast->root, rb_fstring_cstr("<main>"),
886+
file,
887+
rb_realpath_internal(Qnil, file, 1),
888+
line, NULL, ISEQ_TYPE_TOP, &option));
889+
rb_ast_dispose(ast);
890+
return ret;
884891
}
885892

886893
/*

load.c

+4-3
Original file line numberDiff line numberDiff line change
@@ -602,7 +602,7 @@ rb_load_internal0(rb_thread_t *th, VALUE fname, int wrap)
602602
EC_PUSH_TAG(th->ec);
603603
state = EXEC_TAG();
604604
if (state == TAG_NONE) {
605-
NODE *node;
605+
ast_t *ast;
606606
const rb_iseq_t *iseq;
607607

608608
if ((iseq = rb_iseq_load_iseq(fname)) != NULL) {
@@ -611,9 +611,10 @@ rb_load_internal0(rb_thread_t *th, VALUE fname, int wrap)
611611
else {
612612
VALUE parser = rb_parser_new();
613613
rb_parser_set_context(parser, NULL, FALSE);
614-
node = (NODE *)rb_parser_load_file(parser, fname);
615-
iseq = rb_iseq_new_top(node, rb_fstring_cstr("<top (required)>"),
614+
ast = (ast_t *)rb_parser_load_file(parser, fname);
615+
iseq = rb_iseq_new_top(ast->root, rb_fstring_cstr("<top (required)>"),
616616
fname, rb_realpath_internal(Qnil, fname, 1), NULL);
617+
rb_ast_dispose(ast);
617618
}
618619
rb_iseq_eval(iseq);
619620
}

node.c

+104
Original file line numberDiff line numberDiff line change
@@ -1211,3 +1211,107 @@ rb_gc_mark_node(NODE *obj)
12111211
}
12121212
return 0;
12131213
}
1214+
1215+
typedef struct node_buffer_elem_struct {
1216+
struct node_buffer_elem_struct *next;
1217+
NODE buf[1];
1218+
} node_buffer_elem_t;
1219+
1220+
typedef struct node_buffer_struct {
1221+
long idx, len;
1222+
node_buffer_elem_t *head;
1223+
node_buffer_elem_t body;
1224+
} node_buffer_t;
1225+
1226+
node_buffer_t *
1227+
rb_node_buffer_new()
1228+
{
1229+
node_buffer_t *nb = xmalloc(sizeof(node_buffer_t) + 16 * sizeof(NODE));
1230+
nb->idx = 0;
1231+
nb->len = 16;
1232+
nb->head = &nb->body;
1233+
nb->head->next = NULL;
1234+
return nb;
1235+
}
1236+
1237+
void
1238+
rb_node_buffer_free(node_buffer_t *nb)
1239+
{
1240+
node_buffer_elem_t *nbe = nb->head;
1241+
1242+
while (nbe != &nb->body) {
1243+
void *buf = nbe;
1244+
nbe = nbe->next;
1245+
xfree(buf);
1246+
}
1247+
xfree(nb);
1248+
}
1249+
1250+
NODE *
1251+
rb_ast_newnode(ast_t *ast)
1252+
{
1253+
node_buffer_t *nb = ast->node_buffer;
1254+
if (nb->idx >= nb->len) {
1255+
long n = nb->len * 2;
1256+
node_buffer_elem_t *nbe;
1257+
nbe = xmalloc(sizeof(node_buffer_elem_t) + n * sizeof(NODE));
1258+
nb->idx = 0;
1259+
nb->len = n;
1260+
nbe->next = nb->head;
1261+
nb->head = nbe;
1262+
}
1263+
return &nb->head->buf[nb->idx++];
1264+
}
1265+
1266+
void
1267+
rb_ast_delete_node(ast_t *ast, NODE *n)
1268+
{
1269+
(void)ast;
1270+
(void)n;
1271+
/* should we implement freelist? */
1272+
}
1273+
1274+
ast_t *
1275+
rb_ast_new(void)
1276+
{
1277+
return (ast_t *)rb_imemo_new(imemo_ast, 0, (VALUE)rb_node_buffer_new(), rb_ary_tmp_new(0), 0);
1278+
}
1279+
1280+
void
1281+
rb_ast_mark(ast_t *ast)
1282+
{
1283+
if (ast->node_buffer) rb_gc_mark(ast->mark_ary);
1284+
}
1285+
1286+
void
1287+
rb_ast_free(ast_t *ast)
1288+
{
1289+
if (ast->node_buffer) rb_node_buffer_free(ast->node_buffer);
1290+
ast->node_buffer = 0;
1291+
ast->root = 0;
1292+
ast->mark_ary = 0;
1293+
}
1294+
1295+
void
1296+
rb_ast_dispose(ast_t *ast)
1297+
{
1298+
rb_ast_free(ast);
1299+
rb_gc_writebarrier_remember((VALUE)ast);
1300+
}
1301+
1302+
void
1303+
rb_ast_add_mark_object(ast_t *ast, VALUE obj)
1304+
{
1305+
rb_ary_push(ast->mark_ary, obj);
1306+
}
1307+
1308+
void
1309+
rb_ast_delete_mark_object(ast_t *ast, VALUE obj)
1310+
{
1311+
long i;
1312+
for (i = 0; i < RARRAY_LEN(ast->mark_ary); i++) {
1313+
if (obj == RARRAY_AREF(ast->mark_ary, i)) {
1314+
RARRAY_ASET(ast->mark_ary, i, Qnil);
1315+
}
1316+
}
1317+
}

node.h

+26-8
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,24 @@ typedef struct RNode {
439439

440440
RUBY_SYMBOL_EXPORT_BEGIN
441441

442+
typedef struct node_buffer_struct node_buffer_t;
443+
/* T_IMEMO/ast */
444+
typedef struct ast_struct {
445+
VALUE flags;
446+
VALUE reserved1;
447+
NODE *root;
448+
node_buffer_t *node_buffer;
449+
VALUE mark_ary;
450+
} ast_t;
451+
ast_t *rb_ast_new();
452+
void rb_ast_mark(ast_t*);
453+
void rb_ast_dispose(ast_t*);
454+
void rb_ast_free(ast_t*);
455+
void rb_ast_add_mark_object(ast_t*, VALUE);
456+
void rb_ast_delete_mark_object(ast_t*, VALUE);
457+
NODE *rb_ast_newnode(ast_t*);
458+
void rb_ast_delete_node(ast_t*, NODE *n);
459+
442460
VALUE rb_parser_new(void);
443461
VALUE rb_parser_end_seen_p(VALUE);
444462
VALUE rb_parser_encoding(VALUE);
@@ -447,15 +465,15 @@ VALUE rb_parser_set_yydebug(VALUE, VALUE);
447465
VALUE rb_parser_dump_tree(NODE *node, int comment);
448466
void rb_parser_set_options(VALUE, int, int, int, int);
449467

450-
NODE *rb_parser_compile_cstr(VALUE, const char*, const char*, int, int);
451-
NODE *rb_parser_compile_string(VALUE, const char*, VALUE, int);
452-
NODE *rb_parser_compile_file(VALUE, const char*, VALUE, int);
453-
NODE *rb_parser_compile_string_path(VALUE vparser, VALUE fname, VALUE src, int line);
454-
NODE *rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE input, int line);
468+
ast_t *rb_parser_compile_cstr(VALUE, const char*, const char*, int, int);
469+
ast_t *rb_parser_compile_string(VALUE, const char*, VALUE, int);
470+
ast_t *rb_parser_compile_file(VALUE, const char*, VALUE, int);
471+
ast_t *rb_parser_compile_string_path(VALUE vparser, VALUE fname, VALUE src, int line);
472+
ast_t *rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE input, int line);
455473

456-
NODE *rb_compile_cstr(const char*, const char*, int, int);
457-
NODE *rb_compile_string(const char*, VALUE, int);
458-
NODE *rb_compile_file(const char*, VALUE, int);
474+
ast_t *rb_compile_cstr(const char*, const char*, int, int);
475+
ast_t *rb_compile_string(const char*, VALUE, int);
476+
ast_t *rb_compile_file(const char*, VALUE, int);
459477

460478
void rb_node_init(NODE *n, enum node_type type, VALUE a0, VALUE a1, VALUE a2);
461479
NODE *rb_node_newnode(enum node_type,VALUE,VALUE,VALUE);

0 commit comments

Comments
 (0)