Skip to content

Commit

Permalink
[ruby/yarp] Fix heredocs inside %W and %w lists
Browse files Browse the repository at this point in the history
The problem was that we were treating heredoc bodies as part of the %W
list because we didn't push the scanning cursor past the heredoc after
lexing out the here doc.  To fix this, we changed the whitespace
scanning function to quit scanning when it reaches a newline but only in
the case that a heredoc is present.

Additionally, we need to prevent double counting newlines in the case of
a heredoc.  For example:

```ruby
%W(<<foo 123)
foo
```

The newline after the `)` is counted as part of scanning the heredoc, so
we added logic to prevent double counting the newline when scanning the
rest of the %W list.

ruby/prism@eb090d8126

Co-authored-by: Jemma Issroff <jemmaissroff@gmail.com>
  • Loading branch information
2 people authored and matzbot committed Jul 20, 2023
1 parent 5c219c1 commit abce858
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 7 deletions.
28 changes: 28 additions & 0 deletions test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
ProgramNode(0...30)(
[],
StatementsNode(0...30)(
[ArrayNode(0...30)(
[StringNode(4...5)(nil, (4...5), nil, "1"),
InterpolatedStringNode(0...12)(
nil,
[EmbeddedStatementsNode(6...12)(
(6...8),
StatementsNode(8...19)(
[InterpolatedStringNode(8...19)(
(8...11),
[StringNode(15...17)(nil, (15...17), nil, "2\n")],
(17...19)
)]
),
(11...12)
)],
nil
),
StringNode(13...14)(nil, (13...14), nil, "3"),
StringNode(25...26)(nil, (25...26), nil, "4"),
StringNode(27...28)(nil, (27...28), nil, "5")],
(0...3),
(29...30)
)]
)
)
1 change: 0 additions & 1 deletion test/yarp/parse_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ def test_empty_string

known_failures = %w[
seattlerb/heredoc_nested.txt
seattlerb/pct_w_heredoc_interp_nested.txt
]

def find_source_file_node(node)
Expand Down
9 changes: 7 additions & 2 deletions yarp/util/yp_char.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,15 +75,20 @@ yp_strspn_whitespace(const char *string, ptrdiff_t length) {
// whitespace while also tracking the location of each newline. Disallows
// searching past the given maximum number of characters.
size_t
yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list) {
yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list, bool stop_at_newline) {
if (length <= 0) return 0;

size_t size = 0;
size_t maximum = (size_t) length;

while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) {
if (string[size] == '\n') {
yp_newline_list_append(newline_list, string + size);
if (stop_at_newline) {
return size + 1;
}
else {
yp_newline_list_append(newline_list, string + size);
}
}

size++;
Expand Down
2 changes: 1 addition & 1 deletion yarp/util/yp_char.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ size_t yp_strspn_whitespace(const char *string, ptrdiff_t length);
// whitespace while also tracking the location of each newline. Disallows
// searching past the given maximum number of characters.
size_t
yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list);
yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list, bool);

// Returns the number of characters at the start of the string that are inline
// whitespace. Disallows searching past the given maximum number of characters.
Expand Down
6 changes: 4 additions & 2 deletions yarp/util/yp_newline_list.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity
bool
yp_newline_list_append(yp_newline_list_t *list, const char *cursor) {
if (list->size == list->capacity) {
list->capacity = list->capacity * 3 / 2;
list->capacity = (list->capacity * 3) / 2;
list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t));
if (list->offsets == NULL) return false;
}

assert(cursor >= list->start);
list->offsets[list->size++] = (size_t) (cursor - list->start + 1);
size_t newline_offset = (size_t) (cursor - list->start + 1);
assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]);
list->offsets[list->size++] = newline_offset;

return true;
}
Expand Down
14 changes: 13 additions & 1 deletion yarp/yarp.c
Original file line number Diff line number Diff line change
Expand Up @@ -6505,14 +6505,26 @@ parser_lex(yp_parser_t *parser) {
}
}
case YP_LEX_LIST:
if (parser->next_start != NULL) {
parser->current.end = parser->next_start;
parser->next_start = NULL;
}

// First we'll set the beginning of the token.
parser->current.start = parser->current.end;

// If there's any whitespace at the start of the list, then we're
// going to trim it off the beginning and create a new token.
size_t whitespace;
if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list)) > 0) {

bool should_stop = parser->heredoc_end;

if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) {
parser->current.end += whitespace;
if (parser->current.end[-1] == '\n') {
// mutates next_start
parser_flush_heredoc_end(parser);
}
LEX(YP_TOKEN_WORDS_SEP);
}

Expand Down

0 comments on commit abce858

Please sign in to comment.