diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index e8d2ce1b19..3d5cbfcddc 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -768,21 +768,24 @@ def result
source.byte_offset(line, column)
end
- # Add :on_sp tokens
- tokens = insert_on_sp(tokens, source, result.data_loc, bom, eof_token)
+ tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token)
Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
end
private
- def insert_on_sp(tokens, source, data_loc, bom, eof_token)
+ def post_process_tokens(tokens, source, data_loc, bom, eof_token)
new_tokens = []
prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG]
prev_token_end = bom ? 3 : 0
tokens.each do |token|
+ # Skip missing heredoc ends.
+ next if token[1] == :on_heredoc_end && token[2] == ""
+
+ # Add :on_sp tokens.
line, column = token[0]
start_offset = source.byte_offset(line, column)
diff --git a/src/prism.c b/src/prism.c
index 34e5d38b0a..097b4c7305 100644
--- a/src/prism.c
+++ b/src/prism.c
@@ -9856,6 +9856,15 @@ parser_lex(pm_parser_t *parser) {
// We'll check if we're at the end of the file. If we are, then we
// need to return the EOF token.
if (parser->current.end >= parser->end) {
+ // We may be missing closing tokens. We should pop modes one by one
+ // to do the appropriate cleanup like moving next_start for heredocs.
+ // Only when no mode is remaining will we actually emit the EOF token.
+ if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
+ lex_mode_pop(parser);
+ parser_lex(parser);
+ return;
+ }
+
// If we hit EOF, but the EOF came immediately after a newline,
// set the start of the token to the newline. This way any EOF
// errors will be reported as happening on that line rather than
diff --git a/test/prism/errors/unterminated_heredoc_and_embexpr.txt b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
new file mode 100644
index 0000000000..bed7fcd24e
--- /dev/null
+++ b/test/prism/errors/unterminated_heredoc_and_embexpr.txt
@@ -0,0 +1,11 @@
+<= "3.3"
- def test_lex_compare
- prism = Prism.lex_compat(File.read(__FILE__), version: "current").value
- ripper = Ripper.lex(File.read(__FILE__))
+ def test_lex_compat
+ source = "foo bar"
+ prism = Prism.lex_compat(source, version: "current").value
+ ripper = Ripper.lex(source)
assert_equal(ripper, prism)
end
end
+
+ def test_lex_interpolation_unterminated
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN EOF],
+ token_types('"#{')
+ )
+
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF],
+ token_types('"#{' + "\n")
+ )
+ end
+
+ def test_lex_interpolation_unterminated_with_content
+ # FIXME: Emits EOL twice.
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF],
+ token_types('"#{C')
+ )
+
+ assert_equal(
+ %i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF],
+ token_types('"#{C' + "\n")
+ )
+ end
+
+ def test_lex_heredoc_unterminated
+ code = <<~'RUBY'.strip
+ <