Skip to content

Avoid eating """ as closing " delimiter #3116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 64 additions & 52 deletions Sources/SwiftParser/Lexer/Cursor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ extension Lexer.Cursor {

/// The lexer has finished lexing the contents of a string literal and is now
/// looking for the closing quote.
case afterStringLiteral(isRawString: Bool)
case afterStringLiteral(kind: StringLiteralKind, isRawString: Bool)

/// The lexer has lexed the closing quote of a string literal that had raw
/// string delimiters and is now looking for the closing raw string delimiters.
Expand Down Expand Up @@ -453,8 +453,8 @@ extension Lexer.Cursor {
result = lexAfterRawStringDelimiter(delimiterLength: delimiterLength)
case .inStringLiteral(kind: let stringLiteralKind, delimiterLength: let delimiterLength):
result = lexInStringLiteral(stringLiteralKind: stringLiteralKind, delimiterLength: delimiterLength)
case .afterStringLiteral(isRawString: _):
result = lexAfterStringLiteral()
case .afterStringLiteral(kind: let stringLiteralKind, isRawString: _):
result = lexAfterStringLiteral(stringLiteralKind: stringLiteralKind)
case .afterClosingStringQuote:
result = lexAfterClosingStringQuote()
case .inStringInterpolationStart(stringLiteralKind: let stringLiteralKind):
Expand Down Expand Up @@ -998,7 +998,7 @@ extension Lexer.Cursor {
case "0", "1", "2", "3", "4", "5", "6", "7", "8", "9":
return self.lexNumber()
case #"'"#, #"""#:
return self.lexStringQuote(isOpening: true, leadingDelimiterLength: 0)
return self.lexStringQuote(matchingOpening: nil, leadingDelimiterLength: 0)

case "`":
return self.lexEscapedIdentifier()
Expand Down Expand Up @@ -1029,18 +1029,18 @@ extension Lexer.Cursor {
private mutating func lexAfterRawStringDelimiter(delimiterLength: Int) -> Lexer.Result {
switch self.peek() {
case #"'"#, #"""#:
return self.lexStringQuote(isOpening: true, leadingDelimiterLength: delimiterLength)
return self.lexStringQuote(matchingOpening: nil, leadingDelimiterLength: delimiterLength)
case nil:
return Lexer.Result(.endOfFile)
default:
preconditionFailure("state 'afterRawStringDelimiter' expects to be positioned at a quote")
}
}

private mutating func lexAfterStringLiteral() -> Lexer.Result {
private mutating func lexAfterStringLiteral(stringLiteralKind: StringLiteralKind) -> Lexer.Result {
switch self.peek() {
case #"'"#, #"""#:
return self.lexStringQuote(isOpening: false, leadingDelimiterLength: 0)
return self.lexStringQuote(matchingOpening: stringLiteralKind, leadingDelimiterLength: 0)
case nil:
return Lexer.Result(.endOfFile)
default:
Expand Down Expand Up @@ -1796,9 +1796,9 @@ extension Lexer.Cursor {
extension Lexer.Cursor {
private func stateTransitionAfterLexingStringQuote(kind: StringLiteralKind) -> Lexer.StateTransition {
switch currentState {
case .afterStringLiteral(isRawString: true):
case .afterStringLiteral(kind: _, isRawString: true):
return .replace(newState: .afterClosingStringQuote)
case .afterStringLiteral(isRawString: false):
case .afterStringLiteral(kind: _, isRawString: false):
return .pop
case .afterRawStringDelimiter(delimiterLength: let delimiterLength):
return .replace(newState: .inStringLiteral(kind: kind, delimiterLength: delimiterLength))
Expand All @@ -1809,62 +1809,70 @@ extension Lexer.Cursor {
}
}

/// `isOpening` is `true` if this string quote is the opening quote of a string
/// literal and `false` if we are lexing the closing quote of a string literal.
mutating func lexStringQuote(isOpening: Bool, leadingDelimiterLength: Int) -> Lexer.Result {
/// `matchingOpening` is the opening literal kind if this string quote is the
/// closing quote of a string literal, `nil` if it's the opening quote.
mutating func lexStringQuote(
matchingOpening: StringLiteralKind?,
leadingDelimiterLength: Int
) -> Lexer.Result {
if self.advance(matching: "'") {
return Lexer.Result(.singleQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleQuote))
}

let firstQuoteConsumed = self.advance(matching: #"""#)
precondition(firstQuoteConsumed)

// Check to see if we have a multi-line delimiter. If we're matching an
// opening '"' then we want to bail since e.g `"a"""` shouldn't try to eat
// the '"""' as its closing delimiter.
var lookingForMultilineString = self
if lookingForMultilineString.advance(matching: #"""#), lookingForMultilineString.advance(matching: #"""#) {
if leadingDelimiterLength > 0 {
// If this is a string literal, check if we have the closing delimiter on the same line to correctly parse things like `#"""#` as a single line string containing a quote.
var isSingleLineString = lookingForMultilineString

if isSingleLineString.advanceIfStringDelimiter(delimiterLength: leadingDelimiterLength) {
// If we have the correct number of delimiters now, we have something like `#"""#`.
// This is a single-line string.
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
}
if matchingOpening == .singleLine
|| !(lookingForMultilineString.advance(matching: #"""#) && lookingForMultilineString.advance(matching: #"""#))
{
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
}

// Scan ahead until the end of the line. Every time we see a closing
// quote, check if it is followed by the correct number of closing delimiters.
while isSingleLineString.is(notAt: "\r", "\n") {
if isSingleLineString.advance(if: { $0 == #"""# }) {
if isSingleLineString.advanceIfStringDelimiter(delimiterLength: leadingDelimiterLength) {
return Lexer.Result(
.stringQuote,
stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine)
)
}
continue
}
_ = isSingleLineString.advance()
}
if leadingDelimiterLength > 0 {
// If this is a string literal, check if we have the closing delimiter on the same line to correctly parse things like `#"""#` as a single line string containing a quote.
var isSingleLineString = lookingForMultilineString

if isSingleLineString.advanceIfStringDelimiter(delimiterLength: leadingDelimiterLength) {
// If we have the correct number of delimiters now, we have something like `#"""#`.
// This is a single-line string.
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
}

self = lookingForMultilineString
let trailingTriviaLexingMode: TriviaLexingMode?
if isOpening && self.is(at: "\n", "\r") {
// The opening quote of a multi-line string literal must be followed by
// a newline that's not part of the represented string.
trailingTriviaLexingMode = .escapedNewlineInMultiLineStringLiteral
} else {
trailingTriviaLexingMode = nil
// Scan ahead until the end of the line. Every time we see a closing
// quote, check if it is followed by the correct number of closing delimiters.
while isSingleLineString.is(notAt: "\r", "\n") {
if isSingleLineString.advance(if: { $0 == #"""# }) {
if isSingleLineString.advanceIfStringDelimiter(delimiterLength: leadingDelimiterLength) {
return Lexer.Result(
.stringQuote,
stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine)
)
}
continue
}
_ = isSingleLineString.advance()
}
}

return Lexer.Result(
.multilineStringQuote,
stateTransition: stateTransitionAfterLexingStringQuote(kind: .multiLine),
trailingTriviaLexingMode: trailingTriviaLexingMode
)
self = lookingForMultilineString
let trailingTriviaLexingMode: TriviaLexingMode?
if matchingOpening == nil && self.is(at: "\n", "\r") {
// The opening quote of a multi-line string literal must be followed by
// a newline that's not part of the represented string.
trailingTriviaLexingMode = .escapedNewlineInMultiLineStringLiteral
} else {
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
trailingTriviaLexingMode = nil
}

return Lexer.Result(
.multilineStringQuote,
stateTransition: stateTransitionAfterLexingStringQuote(kind: .multiLine),
trailingTriviaLexingMode: trailingTriviaLexingMode
)
}

/// Returns `true` if the cursor is positioned at `\##(` with `delimiterLength`
Expand Down Expand Up @@ -1935,7 +1943,9 @@ extension Lexer.Cursor {
return Lexer.Result(
.stringSegment,
error: error,
stateTransition: .replace(newState: .afterStringLiteral(isRawString: delimiterLength > 0))
stateTransition: .replace(
newState: .afterStringLiteral(kind: stringLiteralKind, isRawString: delimiterLength > 0)
)
)
default:
break
Expand Down Expand Up @@ -1967,7 +1977,9 @@ extension Lexer.Cursor {
return Lexer.Result(
.stringSegment,
error: error,
stateTransition: .replace(newState: .afterStringLiteral(isRawString: delimiterLength > 0))
stateTransition: .replace(
newState: .afterStringLiteral(kind: stringLiteralKind, isRawString: delimiterLength > 0)
)
)
}
}
Expand Down
56 changes: 56 additions & 0 deletions Tests/SwiftParserTest/ExpressionTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1882,6 +1882,62 @@ final class ExpressionTests: ParserTestCase {
)
}

func testInvalidMultiLineClosingDelimiter() {
assertParse(
#"""
"a"1️⃣""2️⃣ a3️⃣ a4️⃣ℹ️"""5️⃣
"""#,
diagnostics: [
DiagnosticSpec(
locationMarker: "1️⃣",
message: "consecutive statements on a line must be separated by newline or ';'",
fixIts: [
"insert newline", "insert ';'",
]
),
DiagnosticSpec(
locationMarker: "2️⃣",
message: "consecutive statements on a line must be separated by newline or ';'",
fixIts: [
"insert newline", "insert ';'",
]
),
DiagnosticSpec(
locationMarker: "3️⃣",
message: "consecutive statements on a line must be separated by newline or ';'",
fixIts: [
"insert newline", "insert ';'",
]
),
DiagnosticSpec(
locationMarker: "4️⃣",
message: "consecutive statements on a line must be separated by newline or ';'",
fixIts: [
"insert newline", "insert ';'",
]
),
DiagnosticSpec(
locationMarker: "5️⃣",
message: #"expected '"""' to end string literal"#,
notes: [
NoteSpec(message: #"to match this opening '"""'"#)
],
fixIts: [
#"insert '"""'"#
]
),
],
fixedSource: #"""
"a"
""
a
a
"""
"""
"""#
)
}

func testEmptyLineInMultilineStringLiteral() {
assertParse(
#"""
Expand Down
Loading