Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions lib/prism/lex_compat.rb
Original file line number Diff line number Diff line change
Expand Up @@ -768,21 +768,24 @@ def result
source.byte_offset(line, column)
end

# Add :on_sp tokens
tokens = insert_on_sp(tokens, source, result.data_loc, bom, eof_token)
tokens = post_process_tokens(tokens, source, result.data_loc, bom, eof_token)

Result.new(tokens, result.comments, result.magic_comments, result.data_loc, result.errors, result.warnings, source)
end

private

def insert_on_sp(tokens, source, data_loc, bom, eof_token)
def post_process_tokens(tokens, source, data_loc, bom, eof_token)
new_tokens = []

prev_token_state = Translation::Ripper::Lexer::State[Translation::Ripper::EXPR_BEG]
prev_token_end = bom ? 3 : 0

tokens.each do |token|
# Skip missing heredoc ends.
next if token[1] == :on_heredoc_end && token[2] == ""

# Add :on_sp tokens.
line, column = token[0]
start_offset = source.byte_offset(line, column)

Expand Down
9 changes: 9 additions & 0 deletions src/prism.c
Original file line number Diff line number Diff line change
Expand Up @@ -9856,6 +9856,15 @@ parser_lex(pm_parser_t *parser) {
// We'll check if we're at the end of the file. If we are, then we
// need to return the EOF token.
if (parser->current.end >= parser->end) {
// We may be missing closing tokens. We should pop modes one by one
// to do the appropriate cleanup like moving next_start for heredocs.
// Only when no mode is remaining will we actually emit the EOF token.
if (parser->lex_modes.current->mode != PM_LEX_DEFAULT) {
lex_mode_pop(parser);
parser_lex(parser);
return;
}

// If we hit EOF, but the EOF came immediately after a newline,
// set the start of the token to the newline. This way any EOF
// errors will be reported as happening on that line rather than
Expand Down
11 changes: 11 additions & 0 deletions test/prism/errors/unterminated_heredoc_and_embexpr.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<<A+B
^ unterminated heredoc; can't find string "A" anywhere before EOF
^ unexpected '+', ignoring it
^ unterminated heredoc; can't find string "A" anywhere before EOF
#{C
^ unexpected heredoc ending; expected an argument
^ unexpected heredoc ending, expecting end-of-input
^ unexpected heredoc ending, ignoring it
^ unexpected end-of-input, assuming it is closing the parent top level context
^ expected a `}` to close the embedded expression

9 changes: 9 additions & 0 deletions test/prism/errors/unterminated_heredoc_and_embexpr_2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<<A+B
^ unterminated heredoc; can't find string "A" anywhere before EOF
#{C + "#{"}
^ unterminated string meets end of file
^ unexpected end-of-input, assuming it is closing the parent top level context
^ expected a `}` to close the embedded expression
^ unterminated string; expected a closing delimiter for the interpolated string
^ expected a `}` to close the embedded expression

53 changes: 50 additions & 3 deletions test/prism/lex_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,58 @@ def test_parse_lex_file
end

if RUBY_VERSION >= "3.3"
def test_lex_compare
prism = Prism.lex_compat(File.read(__FILE__), version: "current").value
ripper = Ripper.lex(File.read(__FILE__))
def test_lex_compat
source = "foo bar"
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added heredocs below where ripper doesn't do the state correctly. So I just have it parse some other source

prism = Prism.lex_compat(source, version: "current").value
ripper = Ripper.lex(source)
assert_equal(ripper, prism)
end
end

def test_lex_interpolation_unterminated
assert_equal(
%i[STRING_BEGIN EMBEXPR_BEGIN EOF],
token_types('"#{')
)

assert_equal(
%i[STRING_BEGIN EMBEXPR_BEGIN IGNORED_NEWLINE EOF],
token_types('"#{' + "\n")
)
end

def test_lex_interpolation_unterminated_with_content
# FIXME: Emits EOL twice.
assert_equal(
%i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT EOF EOF],
token_types('"#{C')
)

assert_equal(
%i[STRING_BEGIN EMBEXPR_BEGIN CONSTANT NEWLINE EOF],
token_types('"#{C' + "\n")
)
end

def test_lex_heredoc_unterminated
code = <<~'RUBY'.strip
<<A+B
#{C
RUBY

assert_equal(
%i[HEREDOC_START EMBEXPR_BEGIN CONSTANT HEREDOC_END PLUS CONSTANT NEWLINE EOF],
token_types(code)
)

assert_equal(
%i[HEREDOC_START EMBEXPR_BEGIN CONSTANT NEWLINE HEREDOC_END PLUS CONSTANT NEWLINE EOF],
token_types(code + "\n")
)
end

def token_types(code)
Prism.lex(code).value.map { |token, _state| token.type }
end
end
end
10 changes: 10 additions & 0 deletions test/prism/ruby/ripper_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,16 @@ class RipperTest < TestCase
define_method("#{fixture.test_name}_lex") { assert_ripper_lex(fixture.read) }
end

def test_lex_ignored_missing_heredoc_end
["", "-", "~"].each do |type|
source = "<<#{type}FOO\n"
assert_ripper_lex(source)

source = "<<#{type}'FOO'\n"
assert_ripper_lex(source)
end
end

module Events
attr_reader :events

Expand Down