Skip to content

Commit b73055f

Browse files
authored
Merge pull request #3116 from hamishknight/air-quotes
Avoid eating `"""` as closing `"` delimiter
2 parents c40a978 + 28c2a43 commit b73055f

File tree

2 files changed

+120
-52
lines changed

2 files changed

+120
-52
lines changed

Sources/SwiftParser/Lexer/Cursor.swift

Lines changed: 64 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ extension Lexer.Cursor {
7979

8080
/// The lexer has finished lexing the contents of a string literal and is now
8181
/// looking for the closing quote.
82-
case afterStringLiteral(isRawString: Bool)
82+
case afterStringLiteral(kind: StringLiteralKind, isRawString: Bool)
8383

8484
/// The lexer has lexed the closing quote of a string literal that had raw
8585
/// string delimiters and is now looking for the closing raw string delimiters.
@@ -453,8 +453,8 @@ extension Lexer.Cursor {
453453
result = lexAfterRawStringDelimiter(delimiterLength: delimiterLength)
454454
case .inStringLiteral(kind: let stringLiteralKind, delimiterLength: let delimiterLength):
455455
result = lexInStringLiteral(stringLiteralKind: stringLiteralKind, delimiterLength: delimiterLength)
456-
case .afterStringLiteral(isRawString: _):
457-
result = lexAfterStringLiteral()
456+
case .afterStringLiteral(kind: let stringLiteralKind, isRawString: _):
457+
result = lexAfterStringLiteral(stringLiteralKind: stringLiteralKind)
458458
case .afterClosingStringQuote:
459459
result = lexAfterClosingStringQuote()
460460
case .inStringInterpolationStart(stringLiteralKind: let stringLiteralKind):
@@ -998,7 +998,7 @@ extension Lexer.Cursor {
998998
case "0", "1", "2", "3", "4", "5", "6", "7", "8", "9":
999999
return self.lexNumber()
10001000
case #"'"#, #"""#:
1001-
return self.lexStringQuote(isOpening: true, leadingDelimiterLength: 0)
1001+
return self.lexStringQuote(matchingOpening: nil, leadingDelimiterLength: 0)
10021002

10031003
case "`":
10041004
return self.lexEscapedIdentifier()
@@ -1029,18 +1029,18 @@ extension Lexer.Cursor {
10291029
private mutating func lexAfterRawStringDelimiter(delimiterLength: Int) -> Lexer.Result {
10301030
switch self.peek() {
10311031
case #"'"#, #"""#:
1032-
return self.lexStringQuote(isOpening: true, leadingDelimiterLength: delimiterLength)
1032+
return self.lexStringQuote(matchingOpening: nil, leadingDelimiterLength: delimiterLength)
10331033
case nil:
10341034
return Lexer.Result(.endOfFile)
10351035
default:
10361036
preconditionFailure("state 'afterRawStringDelimiter' expects to be positioned at a quote")
10371037
}
10381038
}
10391039

1040-
private mutating func lexAfterStringLiteral() -> Lexer.Result {
1040+
private mutating func lexAfterStringLiteral(stringLiteralKind: StringLiteralKind) -> Lexer.Result {
10411041
switch self.peek() {
10421042
case #"'"#, #"""#:
1043-
return self.lexStringQuote(isOpening: false, leadingDelimiterLength: 0)
1043+
return self.lexStringQuote(matchingOpening: stringLiteralKind, leadingDelimiterLength: 0)
10441044
case nil:
10451045
return Lexer.Result(.endOfFile)
10461046
default:
@@ -1796,9 +1796,9 @@ extension Lexer.Cursor {
17961796
extension Lexer.Cursor {
17971797
private func stateTransitionAfterLexingStringQuote(kind: StringLiteralKind) -> Lexer.StateTransition {
17981798
switch currentState {
1799-
case .afterStringLiteral(isRawString: true):
1799+
case .afterStringLiteral(kind: _, isRawString: true):
18001800
return .replace(newState: .afterClosingStringQuote)
1801-
case .afterStringLiteral(isRawString: false):
1801+
case .afterStringLiteral(kind: _, isRawString: false):
18021802
return .pop
18031803
case .afterRawStringDelimiter(delimiterLength: let delimiterLength):
18041804
return .replace(newState: .inStringLiteral(kind: kind, delimiterLength: delimiterLength))
@@ -1809,62 +1809,70 @@ extension Lexer.Cursor {
18091809
}
18101810
}
18111811

1812-
/// `isOpening` is `true` if this string quote is the opening quote of a string
1813-
/// literal and `false` if we are lexing the closing quote of a string literal.
1814-
mutating func lexStringQuote(isOpening: Bool, leadingDelimiterLength: Int) -> Lexer.Result {
1812+
/// `matchingOpening` is the opening literal kind if this string quote is the
1813+
/// closing quote of a string literal, `nil` if it's the opening quote.
1814+
mutating func lexStringQuote(
1815+
matchingOpening: StringLiteralKind?,
1816+
leadingDelimiterLength: Int
1817+
) -> Lexer.Result {
18151818
if self.advance(matching: "'") {
18161819
return Lexer.Result(.singleQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleQuote))
18171820
}
18181821

18191822
let firstQuoteConsumed = self.advance(matching: #"""#)
18201823
precondition(firstQuoteConsumed)
18211824

1825+
// Check to see if we have a multi-line delimiter. If we're matching an
1826+
// opening '"' then we want to bail since e.g `"a"""` shouldn't try to eat
1827+
// the '"""' as its closing delimiter.
18221828
var lookingForMultilineString = self
1823-
if lookingForMultilineString.advance(matching: #"""#), lookingForMultilineString.advance(matching: #"""#) {
1824-
if leadingDelimiterLength > 0 {
1825-
// If this is a string literal, check if we have the closing delimiter on the same line to correctly parse things like `#"""#` as a single line string containing a quote.
1826-
var isSingleLineString = lookingForMultilineString
1827-
1828-
if isSingleLineString.advanceIfStringDelimiter(delimiterLength: leadingDelimiterLength) {
1829-
// If we have the correct number of delimiters now, we have something like `#"""#`.
1830-
// This is a single-line string.
1831-
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
1832-
}
1829+
if matchingOpening == .singleLine
1830+
|| !(lookingForMultilineString.advance(matching: #"""#) && lookingForMultilineString.advance(matching: #"""#))
1831+
{
1832+
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
1833+
}
18331834

1834-
// Scan ahead until the end of the line. Every time we see a closing
1835-
// quote, check if it is followed by the correct number of closing delimiters.
1836-
while isSingleLineString.is(notAt: "\r", "\n") {
1837-
if isSingleLineString.advance(if: { $0 == #"""# }) {
1838-
if isSingleLineString.advanceIfStringDelimiter(delimiterLength: leadingDelimiterLength) {
1839-
return Lexer.Result(
1840-
.stringQuote,
1841-
stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine)
1842-
)
1843-
}
1844-
continue
1845-
}
1846-
_ = isSingleLineString.advance()
1847-
}
1835+
if leadingDelimiterLength > 0 {
1836+
// If this is a string literal, check if we have the closing delimiter on the same line to correctly parse things like `#"""#` as a single line string containing a quote.
1837+
var isSingleLineString = lookingForMultilineString
1838+
1839+
if isSingleLineString.advanceIfStringDelimiter(delimiterLength: leadingDelimiterLength) {
1840+
// If we have the correct number of delimiters now, we have something like `#"""#`.
1841+
// This is a single-line string.
1842+
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
18481843
}
18491844

1850-
self = lookingForMultilineString
1851-
let trailingTriviaLexingMode: TriviaLexingMode?
1852-
if isOpening && self.is(at: "\n", "\r") {
1853-
// The opening quote of a multi-line string literal must be followed by
1854-
// a newline that's not part of the represented string.
1855-
trailingTriviaLexingMode = .escapedNewlineInMultiLineStringLiteral
1856-
} else {
1857-
trailingTriviaLexingMode = nil
1845+
// Scan ahead until the end of the line. Every time we see a closing
1846+
// quote, check if it is followed by the correct number of closing delimiters.
1847+
while isSingleLineString.is(notAt: "\r", "\n") {
1848+
if isSingleLineString.advance(if: { $0 == #"""# }) {
1849+
if isSingleLineString.advanceIfStringDelimiter(delimiterLength: leadingDelimiterLength) {
1850+
return Lexer.Result(
1851+
.stringQuote,
1852+
stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine)
1853+
)
1854+
}
1855+
continue
1856+
}
1857+
_ = isSingleLineString.advance()
18581858
}
1859+
}
18591860

1860-
return Lexer.Result(
1861-
.multilineStringQuote,
1862-
stateTransition: stateTransitionAfterLexingStringQuote(kind: .multiLine),
1863-
trailingTriviaLexingMode: trailingTriviaLexingMode
1864-
)
1861+
self = lookingForMultilineString
1862+
let trailingTriviaLexingMode: TriviaLexingMode?
1863+
if matchingOpening == nil && self.is(at: "\n", "\r") {
1864+
// The opening quote of a multi-line string literal must be followed by
1865+
// a newline that's not part of the represented string.
1866+
trailingTriviaLexingMode = .escapedNewlineInMultiLineStringLiteral
18651867
} else {
1866-
return Lexer.Result(.stringQuote, stateTransition: stateTransitionAfterLexingStringQuote(kind: .singleLine))
1868+
trailingTriviaLexingMode = nil
18671869
}
1870+
1871+
return Lexer.Result(
1872+
.multilineStringQuote,
1873+
stateTransition: stateTransitionAfterLexingStringQuote(kind: .multiLine),
1874+
trailingTriviaLexingMode: trailingTriviaLexingMode
1875+
)
18681876
}
18691877

18701878
/// Returns `true` if the cursor is positioned at `\##(` with `delimiterLength`
@@ -1935,7 +1943,9 @@ extension Lexer.Cursor {
19351943
return Lexer.Result(
19361944
.stringSegment,
19371945
error: error,
1938-
stateTransition: .replace(newState: .afterStringLiteral(isRawString: delimiterLength > 0))
1946+
stateTransition: .replace(
1947+
newState: .afterStringLiteral(kind: stringLiteralKind, isRawString: delimiterLength > 0)
1948+
)
19391949
)
19401950
default:
19411951
break
@@ -1967,7 +1977,9 @@ extension Lexer.Cursor {
19671977
return Lexer.Result(
19681978
.stringSegment,
19691979
error: error,
1970-
stateTransition: .replace(newState: .afterStringLiteral(isRawString: delimiterLength > 0))
1980+
stateTransition: .replace(
1981+
newState: .afterStringLiteral(kind: stringLiteralKind, isRawString: delimiterLength > 0)
1982+
)
19711983
)
19721984
}
19731985
}

Tests/SwiftParserTest/ExpressionTests.swift

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1882,6 +1882,62 @@ final class ExpressionTests: ParserTestCase {
18821882
)
18831883
}
18841884

1885+
func testInvalidMultiLineClosingDelimiter() {
1886+
assertParse(
1887+
#"""
1888+
"a"1️⃣""2️⃣ a3️⃣ a4️⃣ℹ️"""5️⃣
1889+
"""#,
1890+
diagnostics: [
1891+
DiagnosticSpec(
1892+
locationMarker: "1️⃣",
1893+
message: "consecutive statements on a line must be separated by newline or ';'",
1894+
fixIts: [
1895+
"insert newline", "insert ';'",
1896+
]
1897+
),
1898+
DiagnosticSpec(
1899+
locationMarker: "2️⃣",
1900+
message: "consecutive statements on a line must be separated by newline or ';'",
1901+
fixIts: [
1902+
"insert newline", "insert ';'",
1903+
]
1904+
),
1905+
DiagnosticSpec(
1906+
locationMarker: "3️⃣",
1907+
message: "consecutive statements on a line must be separated by newline or ';'",
1908+
fixIts: [
1909+
"insert newline", "insert ';'",
1910+
]
1911+
),
1912+
DiagnosticSpec(
1913+
locationMarker: "4️⃣",
1914+
message: "consecutive statements on a line must be separated by newline or ';'",
1915+
fixIts: [
1916+
"insert newline", "insert ';'",
1917+
]
1918+
),
1919+
DiagnosticSpec(
1920+
locationMarker: "5️⃣",
1921+
message: #"expected '"""' to end string literal"#,
1922+
notes: [
1923+
NoteSpec(message: #"to match this opening '"""'"#)
1924+
],
1925+
fixIts: [
1926+
#"insert '"""'"#
1927+
]
1928+
),
1929+
],
1930+
fixedSource: #"""
1931+
"a"
1932+
""
1933+
a
1934+
a
1935+
"""
1936+
"""
1937+
"""#
1938+
)
1939+
}
1940+
18851941
func testEmptyLineInMultilineStringLiteral() {
18861942
assertParse(
18871943
#"""

0 commit comments

Comments
 (0)