Machinery for parse recovery

+8 -5
Sources/PterodactylBuild/Keys/Blob/GetLineMap.swift
···
import llbuild2fx
extension Keys.Blob {
-
struct GetLineMap: BuildKey {
-
let blobId: LLBDataID
+
public struct GetLineMap: BuildKey {
+
public let blobId: LLBDataID
+
public init(blobId: LLBDataID) {
+
self.blobId = blobId
+
}
-
typealias ValueType = PterodactylSyntax.LineMap
+
public typealias ValueType = PterodactylSyntax.LineMap
-
static let versionDependencies: [any FXVersioning.Type] = [ReadContents.self]
+
public static let versionDependencies: [any FXVersioning.Type] = [ReadContents.self]
-
func computeValue(_ ctx: BuildContext<Self>) async throws -> ValueType {
+
public func computeValue(_ ctx: BuildContext<Self>) async throws -> ValueType {
let code = try await ctx.request(ReadContents(blobId: blobId))
return PterodactylSyntax.LineMap(source: code)
}
+13 -10
Sources/PterodactylBuild/Keys/Blob/ParseDocument.swift
···
import llbuild2fx
extension Keys.Blob {
-
struct ParseDocument: BuildKey {
-
let blobId: LLBDataID
-
-
struct ValueType: Codable, FXValue {
-
let tree: PterodactylSyntax.SyntaxTree
-
let diagnostics: [Diagnostic]
+
public struct ParseDocument: BuildKey {
+
public let blobId: LLBDataID
+
public init(blobId: LLBDataID) {
+
self.blobId = blobId
+
}
+
+
public struct ValueType: Codable, FXValue {
+
public let tree: PterodactylSyntax.SyntaxTree
+
public let diagnostics: [Diagnostic]
}
-
static let versionDependencies: [any FXVersioning.Type] = [ReadContents.self, Tokenise.self]
+
public static let versionDependencies: [any FXVersioning.Type] = [ReadContents.self, Tokenise.self]
-
func computeValue(_ ctx: BuildContext<Self>) async throws -> ValueType {
+
public func computeValue(_ ctx: BuildContext<Self>) async throws -> ValueType {
let code = try await ctx.request(ReadContents(blobId: blobId))
let tokens = try await ctx.request(Tokenise(blobId: blobId))
var parser = Parser(source: code, tokens: tokens)
-
PterodactylSyntax.Document.parse(&parser)
-
return ValueType(tree: parser.tree, diagnostics: parser.diagnostics)
+
PterodactylSyntax.Document.parse(&parser, recovery: [])
+
return ValueType(tree: parser.builder.tree, diagnostics: parser.diagnostics)
}
}
}
+17 -17
Sources/PterodactylSyntax/Grammar.swift
···
static var kinds: [SyntaxTreeKind] { get }
/// Indicates whether the current parser state is consistent with the grammatical production starting here. When a given grammatical element is optional, this can be used to avoid backtracking. This is a *precondition* for parsing.
-
static func before(_ parser: inout Parser) -> Bool
+
static func precondition(_ parser: inout Parser) -> Bool
-
/// Parse the grammatical production, assuming the precondition indicated by ``before(_:)``. This function should not be called outside this module (instead, use ``parse(_:)`` and ``tryParse(_:)``.
-
static func inside(_ parser: inout Parser) -> ParseResult
+
/// Parse the grammatical production, assuming the precondition indicated by ``precondition(_:)``. This function should not be called outside this module (instead, use ``parse(_:recovery:)`` and ``tryParse(_:recovery:)``.
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult
+
}
+
+
extension Grammar {
+
public static func tryParse(_ parser: inout Parser, recovery: Set<TokenKind>) -> Bool {
+
guard !parser.isEndOfFile && precondition(&parser) else { return false }
+
parse(&parser, recovery: recovery)
+
return true
+
}
+
+
public static func parse(_ parser: inout Parser, recovery: Set<TokenKind>) {
+
let mark = parser.builder.open()
+
let result = inside(&parser, recovery: recovery)
+
parser.builder.close(mark: mark, kind: result.kind, metadata: result.metadata)
+
}
}
public struct ParseResult {
···
}
}
-
-
extension Grammar {
-
public static func tryParse(_ parser: inout Parser) -> Bool {
-
guard !parser.isEndOfFile && before(&parser) else { return false }
-
parse(&parser)
-
return true
-
}
-
-
public static func parse(_ parser: inout Parser) {
-
let mark = parser.open()
-
let result = inside(&parser)
-
parser.close(mark: mark, kind: result.kind, metadata: result.metadata)
-
}
-
}
+5 -5
Sources/PterodactylSyntax/Grammar/Document.swift
···
public static let kind = SyntaxTreeKind(name: "document")
public static let kinds = [kind]
-
public static func before(_ parser: inout Parser) -> Bool {
+
public static func precondition(_ parser: inout Parser) -> Bool {
true
}
-
public static func inside(_ parser: inout Parser) -> ParseResult {
+
public static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
parser.eatTrivia()
// Parse imports
while !parser.isAt(kind: .eof) {
parser.eatTrivia()
-
if Theory.before(&parser) { break }
+
if Theory.precondition(&parser) { break }
-
if !Import.tryParse(&parser) {
+
if !Import.tryParse(&parser, recovery: recovery) {
parser.advance(error: "Expected to see either an import or a theory declaration, but instead got \(parser.currentToken.kind): \(parser.currentToken.text)")
}
}
// Theories section
while !parser.isAt(kind: .eof) {
-
if !Theory.tryParse(&parser) {
+
if !Theory.tryParse(&parser, recovery: recovery) {
if parser.isAt(kindSatisfying: \.isVisible) {
let token = parser.currentToken
parser.advance(error: "Unexpected token: \(token.kind)")
+7 -7
Sources/PterodactylSyntax/Grammar/Document/Import.swift
···
static let kind = SyntaxTreeKind(name: "import.name")
static let kinds = [kind]
-
static func before(_ parser: inout Parser) -> Bool {
+
static func precondition(_ parser: inout Parser) -> Bool {
parser.isAt(kind: .identifier)
}
-
static func inside(_ parser: inout Parser) -> ParseResult {
-
parser.expect(kind: .identifier, metadata: TokenMetadata(semanticTokenType: .namespace))
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
+
parser.expect(kind: .identifier, metadata: TokenMetadata(semanticTokenType: .namespace), recovery: recovery)
return ParseResult(kind: Self.kind)
}
}
···
static let kind = SyntaxTreeKind(name: "import")
static let kinds = [kind]
-
static func before(_ parser: inout Parser) -> Bool {
+
static func precondition(_ parser: inout Parser) -> Bool {
parser.isAt(kind: .keyword(.import))
}
-
static func inside(_ parser: inout Parser) -> ParseResult {
-
parser.expect(kind: .keyword(.import), metadata: TokenMetadata(semanticTokenType: .keyword))
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
+
parser.expect(kind: .keyword(.import), metadata: TokenMetadata(semanticTokenType: .keyword), recovery: recovery)
parser.eatTrivia()
-
ImportName.parse(&parser)
+
ImportName.parse(&parser, recovery: recovery)
return ParseResult(kind: Self.kind)
}
}
+6 -6
Sources/PterodactylSyntax/Grammar/Document/Theory.swift
···
enum Theory: Grammar {
static let kind = SyntaxTreeKind(name: "theory")
static let kinds = [kind]
-
-
static func before(_ parser: inout Parser) -> Bool {
+
+
static func precondition(_ parser: inout Parser) -> Bool {
parser.isAt(kind: .keyword(.theory))
}
-
static func inside(_ parser: inout Parser) -> ParseResult {
-
parser.expect(kind: .keyword(.theory), metadata: TokenMetadata(semanticTokenType: .keyword))
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
+
parser.expect(kind: .keyword(.theory), metadata: TokenMetadata(semanticTokenType: .keyword), recovery: recovery)
parser.eatTrivia()
-
if !TheoryName.tryParse(&parser) {
+
if !TheoryName.tryParse(&parser, recovery: recovery) {
parser.advance(error: "Expected theory name")
}
parser.eatTrivia()
-
TheoryBlock.parse(&parser)
+
TheoryBlock.parse(&parser, recovery: recovery)
return ParseResult(kind: Self.kind)
}
+6 -5
Sources/PterodactylSyntax/Grammar/Document/Theory/Declaration.swift
···
static let kinds = [Kinds.claim, Kinds.refine, Kinds.define]
-
static func before(_ parser: inout Parser) -> Bool {
-
Lhs.before(&parser)
+
static func precondition(_ parser: inout Parser) -> Bool {
+
Lhs.precondition(&parser)
}
static let punctuationMap: [Punctuation: SyntaxTreeKind] = [
···
.doubleRightArrow: Kinds.define
]
-
static func inside(_ parser: inout Parser) -> ParseResult {
-
Lhs.parse(&parser)
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
+
let punctuations = punctuationMap.keys.map { TokenKind.punctuation($0) }
+
Lhs.parse(&parser, recovery: recovery.union(punctuations))
parser.eatTrivia()
var kind: SyntaxTreeKind = .error
···
}
parser.eatTrivia()
-
Rhs.parse(&parser)
+
Rhs.parse(&parser, recovery: recovery)
return ParseResult(kind: kind)
}
+3 -3
Sources/PterodactylSyntax/Grammar/Document/Theory/Declaration/Lhs.swift
···
static let kind = SyntaxTreeKind(name: "declaration.lhs")
static let kinds = [kind]
-
static func before(_ parser: inout Parser) -> Bool {
+
static func precondition(_ parser: inout Parser) -> Bool {
parser.isAt(kind: .identifier)
}
-
static func inside(_ parser: inout Parser) -> ParseResult {
-
parser.expect(kind: .identifier, metadata: TokenMetadata(semanticTokenType: .method))
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
+
parser.expect(kind: .identifier, metadata: TokenMetadata(semanticTokenType: .method), recovery: recovery)
return ParseResult(kind: Self.kind)
}
}
+3 -3
Sources/PterodactylSyntax/Grammar/Document/Theory/Declaration/Rhs.swift
···
static let kind = SyntaxTreeKind(name: "declaration.lhs")
static let kinds = [kind]
-
static func before(_ parser: inout Parser) -> Bool {
+
static func precondition(_ parser: inout Parser) -> Bool {
parser.isAt(kind: .identifier)
}
-
static func inside(_ parser: inout Parser) -> ParseResult {
-
parser.expect(kind: .identifier, metadata: TokenMetadata(semanticTokenType: .method))
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
+
parser.expect(kind: .identifier, metadata: TokenMetadata(semanticTokenType: .method), recovery: recovery)
return ParseResult(kind: Self.kind)
}
}
+4 -8
Sources/PterodactylSyntax/Grammar/Document/Theory/TheoryBlock.swift
···
static let kind = SyntaxTreeKind(name: "theory.block")
static let kinds = [kind]
-
static func before(_ parser: inout Parser) -> Bool {
+
static func precondition(_ parser: inout Parser) -> Bool {
parser.isAt(kind: .keyword(.where))
}
-
static func inside(_ parser: inout Parser) -> ParseResult {
-
parser.expect(kind: .keyword(.where), metadata: TokenMetadata(semanticTokenType: .keyword))
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
+
parser.expect(kind: .keyword(.where), metadata: TokenMetadata(semanticTokenType: .keyword), recovery: recovery.union([.blockBegin]))
-
parser.eatTrivia()
if parser.eat(kind: .blockBegin, metadata: nil) {
parser.eatTrivia()
-
-
while Declaration.tryParse(&parser) {
+
while Declaration.tryParse(&parser, recovery: recovery.union([.blockSep, .blockEnd])) {
parser.eatTrivia()
if parser.eat(kind: .blockSep, metadata: nil) {
-
parser.eatTrivia()
continue
} else {
break
}
}
-
_ = parser.eat(kind: .blockEnd, metadata: nil)
}
+2 -2
Sources/PterodactylSyntax/Grammar/Document/Theory/TheoryName.swift
···
static let kind = SyntaxTreeKind(name: "theory.name")
static let kinds = [kind]
-
static func before(_ parser: inout Parser) -> Bool {
+
static func precondition(_ parser: inout Parser) -> Bool {
parser.isAt(kind: .identifier)
}
-
static func inside(_ parser: inout Parser) -> ParseResult {
+
static func inside(_ parser: inout Parser, recovery: Set<TokenKind>) -> ParseResult {
parser.advance(metadata: TokenMetadata(semanticTokenType: .interface))
return ParseResult(kind: Self.kind)
}
+60 -82
Sources/PterodactylSyntax/Parser.swift
···
// SPDX-License-Identifier: MPL-2.0
public struct Parser {
-
enum Event: Equatable {
-
case open(kind: SyntaxTreeKind, metadata: SyntaxTreeMetadata?)
-
case close
-
case advance(metadata: TokenMetadata?)
-
}
-
public struct MarkOpened {
internal let index: Int
}
···
}
public private(set) var diagnostics: [Diagnostic] = []
+
public var builder: SyntaxTreeBuilder = SyntaxTreeBuilder()
-
private var fuel: Int = 0
+
private var inError: Bool = false
private var position: Int = 0
-
private var events: [Event] = []
private var absoluteUtf16Offset: Int = 0
public var absoluteRangeAtCursor: Range<Int> {
return absoluteUtf16Offset..<absoluteUtf16Offset
···
}
-
public mutating func open() -> MarkOpened {
-
let mark = MarkOpened(index: events.count)
-
events.append(.open(kind: .error, metadata: nil))
-
return mark
-
}
-
-
public mutating func close(mark: MarkOpened, kind: SyntaxTreeKind, metadata: SyntaxTreeMetadata?) {
-
events[mark.index] = .open(kind: kind, metadata: metadata)
-
events.append(.close)
-
}
-
public mutating func advance(metadata: TokenMetadata?) {
precondition(!isEndOfFile)
-
events.append(.advance(metadata: metadata))
+
builder.advance(token: currentToken, metadata: metadata)
absoluteUtf16Offset += currentToken.utf16Length
position += 1
-
fuel = 256
}
-
public mutating func advance(error: String?, metadata: TokenMetadata? = nil) {
-
let mark = open()
-
if let error {
-
let diagnostic = Diagnostic(
-
message: error,
-
absoluteRange: absoluteRangeOfCurrentToken
-
)
+
public mutating func advance(error: String, metadata: TokenMetadata? = nil) {
+
let mark = builder.open()
+
let diagnostic = Diagnostic(
+
message: error,
+
absoluteRange: absoluteRangeOfCurrentToken
+
)
-
diagnostics.append(diagnostic)
-
}
+
diagnostics.append(diagnostic)
advance(metadata: metadata)
-
close(mark: mark, kind: .error, metadata: nil)
-
}
-
-
public mutating func lookahead(_ k: Int) -> TokenKind? {
-
precondition(fuel > 0, "Parser is stuck!")
-
fuel -= 1
-
let index = position + k
-
guard tokens.indices.contains(index) else { return nil }
-
return tokens[index].kind
+
builder.close(mark: mark, kind: .error, metadata: nil)
}
-
public mutating func eat(kindSatisfying predicate: (TokenKind) -> Bool, metadata: TokenMetadata?) -> Bool {
-
guard !isEndOfFile && isAt(kindSatisfying: predicate) else { return false }
+
public mutating func eat(kind: TokenKind, metadata: TokenMetadata?) -> Bool {
+
guard !isEndOfFile && isAt(kindSatisfying: { $0 == kind }) else { return false }
advance(metadata: metadata)
return true
}
-
public mutating func eat(kind: TokenKind, metadata: TokenMetadata?) -> Bool {
-
eat(kindSatisfying: { $0 == kind }, metadata: metadata)
+
enum ControlFlow {
+
case `continue`
+
case `break`
}
-
public mutating func expect(kind: TokenKind, metadata: TokenMetadata?, error: String? = nil) {
-
if eat(kind: kind, metadata: metadata) { return }
-
let diagnostic = Diagnostic(
-
message: error ?? "Expected \(kind) but got \(currentToken.kind): `\(currentToken.text)`",
-
absoluteRange: absoluteRangeAtCursor
-
)
-
diagnostics.append(diagnostic)
+
mutating func ate(kind: TokenKind, metadata: TokenMetadata?) -> ControlFlow {
+
guard eat(kind: kind, metadata: metadata) else { return .continue }
+
inError = false
+
eatTrivia()
+
return .break
}
-
public var tree: SyntaxTree {
-
var events = events
-
var stack: [SyntaxTree.Builder] = []
-
var cursor: Int = 0
+
mutating func recoverUntil(_ anchors: Set<TokenKind>, expected: TokenKind, error: String? = nil) {
+
var discardTokens: [Token] = []
+
let startOffset = absoluteUtf16Offset
-
precondition(events.popLast() == .close)
+
while !self.isAt(kindSatisfying: { anchors.contains($0) }) {
+
if isEndOfFile { break }
+
let token = currentToken
+
advance(metadata: nil)
+
discardTokens.append(token)
+
}
-
for event in events {
-
switch event {
-
case .open(let kind, let metadata):
-
stack.append(SyntaxTree.Builder(kind: kind, metadata: metadata, children: []))
-
case .close:
-
let tree = stack.popLast()!
-
stack.modifyLast { last in
-
last.children.append(.tree(tree.tree))
-
}
-
case .advance(let metadata):
-
let token = tokens[cursor]
-
cursor += 1
-
stack.modifyLast { last in
-
last.children.append(.token(token, metadata: metadata))
-
}
+
var endOffset = startOffset
+
+
let error = error ?? "Expected \(expected) but got \(discardTokens)"
+
+
if discardTokens.isEmpty {
+
if !inError {
+
inError = true
+
diagnostics.append(Diagnostic(message: error, absoluteRange: absoluteRangeAtCursor))
+
}
+
return
+
} else {
+
let mark = builder.open()
+
for discardToken in discardTokens {
+
endOffset += discardToken.utf16Length
+
}
+
+
builder.close(mark: mark, kind: .error, metadata: nil)
+
+
if !inError {
+
inError = true
+
diagnostics.append(Diagnostic(message: error, absoluteRange: startOffset..<endOffset))
}
}
+
}
-
assert(stack.count == 1)
-
return stack.popLast()!.tree
+
public mutating func expect(kind: TokenKind, metadata: TokenMetadata?, recovery: Set<TokenKind>, error: String? = nil) {
+
var anchors = recovery
+
if ate(kind: kind, metadata: metadata) == .break { return }
+
anchors.insert(kind)
+
recoverUntil(anchors, expected: kind, error: error)
+
let _ = ate(kind: kind, metadata: metadata)
}
+
}
+
extension Parser {
mutating func eatTrivium() -> Bool {
switch currentToken.kind {
case .whitespace:
···
mutating func eatTrivia() {
while !isEndOfFile && eatTrivium() {}
-
}
-
-
}
-
-
extension Array {
-
fileprivate mutating func modifyLast(_ modifier: (inout Element) -> Void) {
-
if var last = popLast() {
-
modifier(&last)
-
append(last)
-
}
}
}
+1 -1
Sources/PterodactylSyntax/SyntaxTree.swift
···
extension SyntaxTree {
/// A mutable version of ``SyntaxTree`` that does not keep track of textual length, for use when constructing trees.
-
public struct Builder {
+
public struct MutableTree {
public var kind: SyntaxTreeKind
public var metadata: SyntaxTreeMetadata?
public var children: [Child]
+70
Sources/PterodactylSyntax/SyntaxTreeBuilder.swift
···
+
// SPDX-FileCopyrightText: 2025 The Project Pterodactyl Developers
+
//
+
// SPDX-License-Identifier: MPL-2.0
+
+
import Foundation
+
+
public struct SyntaxTreeBuilder {
+
private enum Event: Equatable {
+
case open(kind: SyntaxTreeKind, metadata: SyntaxTreeMetadata?)
+
case close
+
case advance(token: Token, metadata: TokenMetadata?)
+
}
+
+
public struct MarkOpened {
+
internal let index: Int
+
}
+
+
private var events: [Event] = []
+
+
public mutating func advance(token: Token, metadata: TokenMetadata?) {
+
events.append(.advance(token: token, metadata: metadata))
+
}
+
+
public mutating func open() -> MarkOpened {
+
let mark = MarkOpened(index: events.count)
+
events.append(.open(kind: .error, metadata: nil))
+
return mark
+
}
+
+
public mutating func close(mark: MarkOpened, kind: SyntaxTreeKind, metadata: SyntaxTreeMetadata?) {
+
events[mark.index] = .open(kind: kind, metadata: metadata)
+
events.append(.close)
+
}
+
+
public var tree: SyntaxTree {
+
var events = events
+
var stack: [SyntaxTree.MutableTree] = []
+
+
precondition(events.popLast() == .close)
+
+
for event in events {
+
switch event {
+
case .open(let kind, let metadata):
+
stack.append(SyntaxTree.MutableTree(kind: kind, metadata: metadata, children: []))
+
case .close:
+
let tree = stack.popLast()!
+
stack.modifyLast { last in
+
last.children.append(.tree(tree.tree))
+
}
+
case .advance(let token, let metadata):
+
stack.modifyLast { last in
+
last.children.append(.token(token, metadata: metadata))
+
}
+
}
+
}
+
+
assert(stack.count == 1)
+
return stack.popLast()!.tree
+
}
+
}
+
+
+
extension Array {
+
fileprivate mutating func modifyLast(_ modifier: (inout Element) -> Void) {
+
if var last = popLast() {
+
modifier(&last)
+
append(last)
+
}
+
}
+
}
+1 -1
Sources/PterodactylSyntax/Token.swift
···
import Foundation
-
public struct Token: Codable {
+
public struct Token: Codable, Equatable {
public let kind: TokenKind
public let text: String
public let utf16Length: Int
+1 -1
Sources/PterodactylSyntax/Types.swift
···
case equal = "="
}
-
public enum TokenKind: Codable, Equatable, Sendable {
+
public enum TokenKind: Codable, Equatable, Sendable, Hashable {
case eof
case keyword(Keyword)
case punctuation(Punctuation)