From abce8583e253e96cf1268926ee7fd790f980ea96 Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Tue, 18 Jul 2023 14:37:26 -0700 Subject: [PATCH] [ruby/yarp] Fix heredocs inside %W and %w lists The problem was that we were treating heredoc bodies as part of the %W list because we didn't push the scanning cursor past the heredoc after lexing out the here doc. To fix this, we changed the whitespace scanning function to quit scanning when it reaches a newline but only in the case that a heredoc is present. Additionally, we need to prevent double counting newlines in the case of a heredoc. For example: ```ruby %W(< --- .../seattlerb/pct_w_heredoc_interp_nested.txt | 28 +++++++++++++++++++ test/yarp/parse_test.rb | 1 - yarp/util/yp_char.c | 9 ++++-- yarp/util/yp_char.h | 2 +- yarp/util/yp_newline_list.c | 6 ++-- yarp/yarp.c | 14 +++++++++- 6 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt diff --git a/test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt b/test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt new file mode 100644 index 00000000000000..89ce74ce19627d --- /dev/null +++ b/test/snapshots/seattlerb/pct_w_heredoc_interp_nested.txt @@ -0,0 +1,28 @@ +ProgramNode(0...30)( + [], + StatementsNode(0...30)( + [ArrayNode(0...30)( + [StringNode(4...5)(nil, (4...5), nil, "1"), + InterpolatedStringNode(0...12)( + nil, + [EmbeddedStatementsNode(6...12)( + (6...8), + StatementsNode(8...19)( + [InterpolatedStringNode(8...19)( + (8...11), + [StringNode(15...17)(nil, (15...17), nil, "2\n")], + (17...19) + )] + ), + (11...12) + )], + nil + ), + StringNode(13...14)(nil, (13...14), nil, "3"), + StringNode(25...26)(nil, (25...26), nil, "4"), + StringNode(27...28)(nil, (27...28), nil, "5")], + (0...3), + (29...30) + )] + ) +) diff --git a/test/yarp/parse_test.rb b/test/yarp/parse_test.rb index 3eff7d447f222e..b6020b82dbca72 100644 --- a/test/yarp/parse_test.rb +++ b/test/yarp/parse_test.rb @@ -28,7 +28,6 @@ def test_empty_string known_failures = %w[ seattlerb/heredoc_nested.txt - seattlerb/pct_w_heredoc_interp_nested.txt ] def find_source_file_node(node) diff --git a/yarp/util/yp_char.c b/yarp/util/yp_char.c index 9befcb51053b82..1c0c20edd92083 100644 --- a/yarp/util/yp_char.c +++ b/yarp/util/yp_char.c @@ -75,7 +75,7 @@ yp_strspn_whitespace(const char *string, ptrdiff_t length) { // whitespace while also tracking the location of each newline. Disallows // searching past the given maximum number of characters. size_t -yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list) { +yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list, bool stop_at_newline) { if (length <= 0) return 0; size_t size = 0; @@ -83,7 +83,12 @@ yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t while (size < maximum && (yp_char_table[(unsigned char) string[size]] & YP_CHAR_BIT_WHITESPACE)) { if (string[size] == '\n') { - yp_newline_list_append(newline_list, string + size); + if (stop_at_newline) { + return size + 1; + } + else { + yp_newline_list_append(newline_list, string + size); + } } size++; diff --git a/yarp/util/yp_char.h b/yarp/util/yp_char.h index 85e5ce4c656107..dcc011f0a12f17 100644 --- a/yarp/util/yp_char.h +++ b/yarp/util/yp_char.h @@ -15,7 +15,7 @@ size_t yp_strspn_whitespace(const char *string, ptrdiff_t length); // whitespace while also tracking the location of each newline. Disallows // searching past the given maximum number of characters. size_t -yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list); +yp_strspn_whitespace_newlines(const char *string, long length, yp_newline_list_t *newline_list, bool); // Returns the number of characters at the start of the string that are inline // whitespace. Disallows searching past the given maximum number of characters. diff --git a/yarp/util/yp_newline_list.c b/yarp/util/yp_newline_list.c index c619e83c92e405..8b24f82a0248ea 100644 --- a/yarp/util/yp_newline_list.c +++ b/yarp/util/yp_newline_list.c @@ -25,13 +25,15 @@ yp_newline_list_init(yp_newline_list_t *list, const char *start, size_t capacity bool yp_newline_list_append(yp_newline_list_t *list, const char *cursor) { if (list->size == list->capacity) { - list->capacity = list->capacity * 3 / 2; + list->capacity = (list->capacity * 3) / 2; list->offsets = (size_t *) realloc(list->offsets, list->capacity * sizeof(size_t)); if (list->offsets == NULL) return false; } assert(cursor >= list->start); - list->offsets[list->size++] = (size_t) (cursor - list->start + 1); + size_t newline_offset = (size_t) (cursor - list->start + 1); + assert(list->size == 0 || newline_offset > list->offsets[list->size - 1]); + list->offsets[list->size++] = newline_offset; return true; } diff --git a/yarp/yarp.c b/yarp/yarp.c index c80aa5499a0de8..2ae3b3ea28f1f9 100644 --- a/yarp/yarp.c +++ b/yarp/yarp.c @@ -6505,14 +6505,26 @@ parser_lex(yp_parser_t *parser) { } } case YP_LEX_LIST: + if (parser->next_start != NULL) { + parser->current.end = parser->next_start; + parser->next_start = NULL; + } + // First we'll set the beginning of the token. parser->current.start = parser->current.end; // If there's any whitespace at the start of the list, then we're // going to trim it off the beginning and create a new token. size_t whitespace; - if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list)) > 0) { + + bool should_stop = parser->heredoc_end; + + if ((whitespace = yp_strspn_whitespace_newlines(parser->current.end, parser->end - parser->current.end, &parser->newline_list, should_stop)) > 0) { parser->current.end += whitespace; + if (parser->current.end[-1] == '\n') { + // mutates next_start + parser_flush_heredoc_end(parser); + } LEX(YP_TOKEN_WORDS_SEP); }