package parser import ( "bytes" _ "embed" "encoding/json" "fmt" "go/ast" "go/format" "go/parser" "go/token" goToken "go/token" "io" "strconv" "strings" "text/template" "urubu/driver/lexer" spec "urubu/spec/grammar" ) type Grammar interface { // InitialState returns the initial state of a parser. InitialState() int // StartProduction returns the start production of grammar. StartProduction() int // Action returns an ACTION entry corresponding to a (state, terminal symbol) pair. Action(state int, terminal int) int // GoTo returns a GOTO entry corresponding to a (state, non-terminal symbol) pair. GoTo(state int, lhs int) int // ErrorTrapperState returns true when a state can shift the error symbol. ErrorTrapperState(state int) bool // LHS returns a LHS symbol of a production. LHS(prod int) int // AlternativeSymbolCount returns a symbol count of p production. AlternativeSymbolCount(prod int) int // RecoverProduction returns true when a production has the recover directive. RecoverProduction(prod int) bool // NonTerminal retuns a string representaion of a non-terminal symbol. NonTerminal(nonTerminal int) string // TerminalCount returns a terminal symbol count of grammar. TerminalCount() int // SkipTerminal returns true when a terminal symbol must be skipped on syntax analysis. SkipTerminal(terminal int) bool // EOF returns the EOF symbol. EOF() int // Error returns the error symbol. Error() int // Terminal retuns a string representaion of a terminal symbol. Terminal(terminal int) string // ASTAction returns an AST action entries. ASTAction(prod int) []int } type VToken interface { // TerminalID returns a terminal ID. TerminalID() int // Lexeme returns a lexeme. Lexeme() []byte // EOF returns true when a token represents EOF. EOF() bool // Invalid returns true when a token is invalid. Invalid() bool // BytePosition returns (position, length) pair. // `position` is a byte position where a token appears and `length` is a length in bytes. BytePosition() (int, int) // Position returns (row, column) pair. Position() (int, int) } type TokenStream interface { Next() (VToken, error) } type SyntaxError struct { Row int Col int Message string Token VToken ExpectedTerminals []string } type ParserOption func(p *Parser) error // DisableLAC disables LAC (lookahead correction). LAC is enabled by default. func DisableLAC() ParserOption { return func(p *Parser) error { p.disableLAC = true return nil } } func SemanticAction(semAct SemanticActionSet) ParserOption { return func(p *Parser) error { p.semAct = semAct return nil } } type Parser struct { toks TokenStream gram Grammar stateStack *stateStack semAct SemanticActionSet disableLAC bool onError bool shiftCount int synErrs []*SyntaxError } func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) { p := &Parser{ toks: toks, gram: gram, stateStack: &stateStack{}, } for _, opt := range opts { err := opt(p) if err != nil { return nil, err } } return p, nil } func (p *Parser) Parse() error { p.stateStack.push(p.gram.InitialState()) tok, err := p.nextToken() if err != nil { return err } ACTION_LOOP: for { act := p.lookupAction(tok) switch { case act < 0: // Shift nextState := act * -1 recovered := false if p.onError { p.shiftCount++ // When the parser performs shift three times, the parser recovers from the error state. if p.shiftCount >= 3 { p.onError = false p.shiftCount = 0 recovered = true } } p.shift(nextState) if p.semAct != nil { p.semAct.Shift(tok, recovered) } tok, err = p.nextToken() if err != nil { return err } case act > 0: // Reduce prodNum := act recovered := false if p.onError && p.gram.RecoverProduction(prodNum) { p.onError = false p.shiftCount = 0 recovered = true } accepted := p.reduce(prodNum) if accepted { if p.semAct != nil { p.semAct.Accept() } return nil } if p.semAct != nil { p.semAct.Reduce(prodNum, recovered) } default: // Error if p.onError { tok, err = p.nextToken() if err != nil { return err } if tok.EOF() { if p.semAct != nil { p.semAct.MissError(tok) } return nil } continue ACTION_LOOP } row, col := tok.Position() p.synErrs = append(p.synErrs, &SyntaxError{ Row: row, Col: col, Message: "unexpected token", Token: tok, ExpectedTerminals: p.searchLookahead(p.stateStack.top()), }) count, ok := p.trapError() if !ok { if p.semAct != nil { p.semAct.MissError(tok) } return nil } p.onError = true p.shiftCount = 0 act, err := p.lookupActionOnError() if err != nil { return err } p.shift(act * -1) if p.semAct != nil { p.semAct.TrapAndShiftError(tok, count) } } } } // validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid, // this method returns `true`. func (p *Parser) validateLookahead(term int) bool { p.stateStack.enableExploratoryMode() defer p.stateStack.disableExploratoryMode() for { act := p.gram.Action(p.stateStack.topExploratorily(), term) switch { case act < 0: // Shift return true case act > 0: // Reduce prodNum := act lhs := p.gram.LHS(prodNum) if lhs == p.gram.LHS(p.gram.StartProduction()) { return true } n := p.gram.AlternativeSymbolCount(prodNum) p.stateStack.popExploratorily(n) state := p.gram.GoTo(p.stateStack.topExploratorily(), lhs) p.stateStack.pushExploratorily(state) default: // Error return false } } } func (p *Parser) nextToken() (VToken, error) { for { // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0, // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect // a syntax error because the parser cannot find an entry corresponding to the invalid token. tok, err := p.toks.Next() if err != nil { return nil, err } if p.gram.SkipTerminal(tok.TerminalID()) { continue } return tok, nil } } func (p *Parser) tokenToTerminal(tok VToken) int { if tok.EOF() { return p.gram.EOF() } return tok.TerminalID() } func (p *Parser) lookupAction(tok VToken) int { if !p.disableLAC { term := p.tokenToTerminal(tok) if !p.validateLookahead(term) { return 0 } } return p.gram.Action(p.stateStack.top(), p.tokenToTerminal(tok)) } func (p *Parser) lookupActionOnError() (int, error) { act := p.gram.Action(p.stateStack.top(), p.gram.Error()) if act >= 0 { return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.Terminal(p.gram.Error())) } return act, nil } func (p *Parser) shift(nextState int) { p.stateStack.push(nextState) } func (p *Parser) reduce(prodNum int) bool { lhs := p.gram.LHS(prodNum) if lhs == p.gram.LHS(p.gram.StartProduction()) { return true } n := p.gram.AlternativeSymbolCount(prodNum) p.stateStack.pop(n) nextState := p.gram.GoTo(p.stateStack.top(), lhs) p.stateStack.push(nextState) return false } func (p *Parser) trapError() (int, bool) { count := 0 for { if p.gram.ErrorTrapperState(p.stateStack.top()) { return count, true } if p.stateStack.top() != p.gram.InitialState() { p.stateStack.pop(1) count++ } else { return 0, false } } } func (p *Parser) SyntaxErrors() []*SyntaxError { return p.synErrs } func (p *Parser) searchLookahead(state int) []string { kinds := []string{} termCount := p.gram.TerminalCount() for term := 0; term < termCount; term++ { if p.disableLAC { if p.gram.Action(p.stateStack.top(), term) == 0 { continue } } else { if !p.validateLookahead(term) { continue } } // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol // intentionally. if term == p.gram.Error() { continue } kinds = append(kinds, p.gram.Terminal(term)) } return kinds } type stateStack struct { items []int itemsExp []int } func (s *stateStack) enableExploratoryMode() { s.itemsExp = make([]int, len(s.items)) copy(s.itemsExp, s.items) } func (s *stateStack) disableExploratoryMode() { s.itemsExp = nil } func (s *stateStack) top() int { return s.items[len(s.items)-1] } func (s *stateStack) topExploratorily() int { return s.itemsExp[len(s.itemsExp)-1] } func (s *stateStack) push(state int) { s.items = append(s.items, state) } func (s *stateStack) pushExploratorily(state int) { s.itemsExp = append(s.itemsExp, state) } func (s *stateStack) pop(n int) { s.items = s.items[:len(s.items)-n] } func (s *stateStack) popExploratorily(n int) { s.itemsExp = s.itemsExp[:len(s.itemsExp)-n] } // SemanticActionSet is a set of semantic actions a parser calls. type SemanticActionSet interface { // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol. // When the parser recovered from an error state by shifting the token, `recovered` is true. Shift(tok VToken, recovered bool) // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production. // When the parser recovered from an error state by reducing the production, `recovered` is true. Reduce(prodNum int, recovered bool) // Accept runs when the parser accepts an input. Accept() // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack. // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards // from the state stack. // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token // corresponding to the error symbol doesn't exist. TrapAndShiftError(cause VToken, popped int) // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error. MissError(cause VToken) } var _ SemanticActionSet = &SyntaxTreeActionSet{} // SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface. type SyntaxTreeNode interface { // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast` // directive with `...` operator. ChildCount() int // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast` // directive with `...` operator. ExpandChildren() []SyntaxTreeNode } var _ SyntaxTreeNode = &Node{} // SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types. // The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface. type SyntaxTreeBuilder interface { Shift(kindName string, tok VToken) SyntaxTreeNode ShiftError(kindName string) SyntaxTreeNode Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode Accept(f SyntaxTreeNode) } var _ SyntaxTreeBuilder = &DefaultSyntaxTreeBuilder{} // DefaultSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder. type DefaultSyntaxTreeBuilder struct { tree *Node } // NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder. func NewDefaultSyntaxTreeBuilder() *DefaultSyntaxTreeBuilder { return &DefaultSyntaxTreeBuilder{} } // Shift is a implementation of SyntaxTreeBuilder.Shift. func (b *DefaultSyntaxTreeBuilder) Shift(kindName string, tok VToken) SyntaxTreeNode { bytePos, byteLen := tok.BytePosition() row, col := tok.Position() return &Node{ Type: NodeTypeTerminal, KindName: kindName, Text: string(tok.Lexeme()), BytePos: bytePos, ByteLen: byteLen, Row: row, Col: col, } } // ShiftError is a implementation of SyntaxTreeBuilder.ShiftError. func (b *DefaultSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode { return &Node{ Type: NodeTypeError, KindName: kindName, } } // Reduce is a implementation of SyntaxTreeBuilder.Reduce. func (b *DefaultSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode { cNodes := make([]*Node, len(children)) for i, c := range children { cNodes[i] = c.(*Node) } return &Node{ Type: NodeTypeNonTerminal, KindName: kindName, Children: cNodes, } } // Accept is a implementation of SyntaxTreeBuilder.Accept. func (b *DefaultSyntaxTreeBuilder) Accept(f SyntaxTreeNode) { b.tree = f.(*Node) } // Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil. func (b *DefaultSyntaxTreeBuilder) Tree() *Node { return b.tree } // SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree. type SyntaxTreeActionSet struct { gram Grammar builder SyntaxTreeBuilder semStack *semanticStack disableASTAction bool } // NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree). // When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them. func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { return &SyntaxTreeActionSet{ gram: gram, builder: builder, semStack: newSemanticStack(), } } // NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree). // Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them. func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { return &SyntaxTreeActionSet{ gram: gram, builder: builder, semStack: newSemanticStack(), disableASTAction: true, } } // Shift is a implementation of SemanticActionSet.Shift method. func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) { term := a.tokenToTerminal(tok) a.semStack.push(a.builder.Shift(a.gram.Terminal(term), tok)) } // Reduce is a implementation of SemanticActionSet.Reduce method. func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) { lhs := a.gram.LHS(prodNum) // When an alternative is empty, `n` will be 0, and `handle` will be empty slice. n := a.gram.AlternativeSymbolCount(prodNum) handle := a.semStack.pop(n) var astAct []int if !a.disableASTAction { astAct = a.gram.ASTAction(prodNum) } var children []SyntaxTreeNode if astAct != nil { // Count the number of children in advance to avoid frequent growth in a slice for children. { l := 0 for _, e := range astAct { if e > 0 { l++ } else { offset := e*-1 - 1 l += handle[offset].ChildCount() } } children = make([]SyntaxTreeNode, l) } p := 0 for _, e := range astAct { if e > 0 { offset := e - 1 children[p] = handle[offset] p++ } else { offset := e*-1 - 1 for _, c := range handle[offset].ExpandChildren() { children[p] = c p++ } } } } else { // If an alternative has no AST action, a driver generates // a node with the same structure as a CST. children = handle } a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children)) } // Accept is a implementation of SemanticActionSet.Accept method. func (a *SyntaxTreeActionSet) Accept() { top := a.semStack.pop(1) a.builder.Accept(top[0]) } // TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method. func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) { a.semStack.pop(popped) a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error()))) } // MissError is a implementation of SemanticActionSet.MissError method. func (a *SyntaxTreeActionSet) MissError(cause VToken) { } func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int { if tok.EOF() { return a.gram.EOF() } return tok.TerminalID() } type semanticStack struct { frames []SyntaxTreeNode } func newSemanticStack() *semanticStack { return &semanticStack{ frames: make([]SyntaxTreeNode, 0, 100), } } func (s *semanticStack) push(f SyntaxTreeNode) { s.frames = append(s.frames, f) } func (s *semanticStack) pop(n int) []SyntaxTreeNode { fs := s.frames[len(s.frames)-n:] s.frames = s.frames[:len(s.frames)-n] return fs } type NodeType int const ( NodeTypeError = 0 NodeTypeTerminal = 1 NodeTypeNonTerminal = 2 ) // Node is a implementation of SyntaxTreeNode interface. type Node struct { Type NodeType KindName string Text string BytePos int ByteLen int Row int Col int Children []*Node } func (n *Node) MarshalJSON() ([]byte, error) { switch n.Type { case NodeTypeError: return json.Marshal(struct { Type NodeType `json:"type"` KindName string `json:"kind_name"` }{ Type: n.Type, KindName: n.KindName, }) case NodeTypeTerminal: if n.KindName == "" { return json.Marshal(struct { Type NodeType `json:"type"` Text string `json:"text"` Row int `json:"row"` Col int `json:"col"` }{ Type: n.Type, Text: n.Text, Row: n.Row, Col: n.Col, }) } return json.Marshal(struct { Type NodeType `json:"type"` KindName string `json:"kind_name"` Text string `json:"text"` Row int `json:"row"` Col int `json:"col"` }{ Type: n.Type, KindName: n.KindName, Text: n.Text, Row: n.Row, Col: n.Col, }) case NodeTypeNonTerminal: return json.Marshal(struct { Type NodeType `json:"type"` KindName string `json:"kind_name"` Children []*Node `json:"children"` }{ Type: n.Type, KindName: n.KindName, Children: n.Children, }) default: return nil, fmt.Errorf("invalid node type: %v", n.Type) } } // ChildCount is a implementation of SyntaxTreeNode.ChildCount. func (n *Node) ChildCount() int { return len(n.Children) } // ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren. func (n *Node) ExpandChildren() []SyntaxTreeNode { fs := make([]SyntaxTreeNode, len(n.Children)) for i, n := range n.Children { fs[i] = n } return fs } // PrintTree prints a syntax tree whose root is `node`. func PrintTree(w io.Writer, node *Node) { printTree(w, node, "", "") } func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) { if node == nil { return } switch node.Type { case NodeTypeError: fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) case NodeTypeTerminal: fmt.Fprintf(w, "%v%v %v\n", ruledLine, node.KindName, strconv.Quote(node.Text)) case NodeTypeNonTerminal: fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) num := len(node.Children) for i, child := range node.Children { var line string if num > 1 && i < num-1 { line = "├─ " } else { line = "└─ " } var prefix string if i >= num-1 { prefix = " " } else { prefix = "│ " } printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) } } } type grammarImpl struct { g *spec.CompiledGrammar } func NewGrammar(g *spec.CompiledGrammar) *grammarImpl { return &grammarImpl{ g: g, } } func (g *grammarImpl) InitialState() int { return g.g.Syntactic.InitialState } func (g *grammarImpl) StartProduction() int { return g.g.Syntactic.StartProduction } func (g *grammarImpl) RecoverProduction(prod int) bool { return g.g.Syntactic.RecoverProductions[prod] != 0 } func (g *grammarImpl) Action(state int, terminal int) int { return g.g.Syntactic.Action[state*g.g.Syntactic.TerminalCount+terminal] } func (g *grammarImpl) GoTo(state int, lhs int) int { return g.g.Syntactic.GoTo[state*g.g.Syntactic.NonTerminalCount+lhs] } func (g *grammarImpl) AlternativeSymbolCount(prod int) int { return g.g.Syntactic.AlternativeSymbolCounts[prod] } func (g *grammarImpl) TerminalCount() int { return g.g.Syntactic.TerminalCount } func (g *grammarImpl) SkipTerminal(terminal int) bool { return g.g.Syntactic.TerminalSkip[terminal] == 1 } func (g *grammarImpl) ErrorTrapperState(state int) bool { return g.g.Syntactic.ErrorTrapperStates[state] != 0 } func (g *grammarImpl) NonTerminal(nonTerminal int) string { return g.g.Syntactic.NonTerminals[nonTerminal] } func (g *grammarImpl) LHS(prod int) int { return g.g.Syntactic.LHSSymbols[prod] } func (g *grammarImpl) EOF() int { return g.g.Syntactic.EOFSymbol } func (g *grammarImpl) Error() int { return g.g.Syntactic.ErrorSymbol } func (g *grammarImpl) Terminal(terminal int) string { return g.g.Syntactic.Terminals[terminal] } func (g *grammarImpl) ASTAction(prod int) []int { return g.g.ASTAction.Entries[prod] } // go:embed parser.go var parserCoreSrc string // go:embed semantic_action.go var semActSrc string func GenParser(cgram *spec.CompiledGrammar, pkgName string) ([]byte, error) { var parserSrc string { fset := goToken.NewFileSet() f, err := parser.ParseFile(fset, "parser.go", parserCoreSrc, parser.ParseComments) if err != nil { return nil, err } var b strings.Builder err = format.Node(&b, fset, f) if err != nil { return nil, err } parserSrc = b.String() } var grammarSrc string { t, err := template.New("").Funcs(genGrammarTemplateFuncs(cgram)).Parse(grammarSrcTmplate) if err != nil { return nil, err } var b strings.Builder err = t.Execute(&b, map[string]interface{}{ "initialState": cgram.Syntactic.InitialState, "startProduction": cgram.Syntactic.StartProduction, "terminalCount": cgram.Syntactic.TerminalCount, "nonTerminalCount": cgram.Syntactic.NonTerminalCount, "eofSymbol": cgram.Syntactic.EOFSymbol, "errorSymbol": cgram.Syntactic.ErrorSymbol, }) if err != nil { return nil, err } grammarSrc = b.String() } var lexerSrc string { t, err := template.New("").Funcs(genLexerTemplateFuncs(cgram)).Parse(lexerSrcTmplate) if err != nil { return nil, err } var b strings.Builder err = t.Execute(&b, nil) if err != nil { return nil, err } lexerSrc = b.String() } var src string { tmpl := `// Code generated by vartan-go. DO NOT EDIT. {{ .parserSrc }} {{ .grammarSrc }} {{ .lexerSrc }} ` t, err := template.New("").Parse(tmpl) if err != nil { return nil, err } var b strings.Builder err = t.Execute(&b, map[string]string{ "parserSrc": parserSrc, "grammarSrc": grammarSrc, "lexerSrc": lexerSrc, }) if err != nil { return nil, err } src = b.String() } fset := goToken.NewFileSet() f, err := parser.ParseFile(fset, "", src, parser.ParseComments) if err != nil { return nil, err } f.Name = ast.NewIdent(pkgName) // Complete an import statement. for _, d := range f.Decls { gd, ok := d.(*ast.GenDecl) if !ok || gd.Tok != token.IMPORT { continue } gd.Specs = append(gd.Specs, &ast.ImportSpec{ Path: &ast.BasicLit{ Value: `"io"`, }, }) break } var b bytes.Buffer err = format.Node(&b, fset, f) if err != nil { return nil, err } return b.Bytes(), nil } const grammarSrcTmplate = ` type grammarImpl struct { recoverProductions []int action []int goTo []int alternativeSymbolCounts []int errorTrapperStates []int nonTerminals []string lhsSymbols []int terminals []string terminalSkip []int astActions [][]int } func NewGrammar() *grammarImpl { return &grammarImpl{ recoverProductions: {{ genRecoverProductions }}, action: {{ genAction }}, goTo: {{ genGoTo }}, alternativeSymbolCounts: {{ genAlternativeSymbolCounts }}, errorTrapperStates: {{ genErrorTrapperStates }}, nonTerminals: {{ genNonTerminals }}, lhsSymbols: {{ genLHSSymbols }}, terminals: {{ genTerminals }}, terminalSkip: {{ genTerminalSkip }}, astActions: {{ genASTActions }}, } } func (g *grammarImpl) InitialState() int { return {{ .initialState }} } func (g *grammarImpl) StartProduction() int { return {{ .startProduction }} } func (g *grammarImpl) RecoverProduction(prod int) bool { return g.recoverProductions[prod] != 0 } func (g *grammarImpl) Action(state int, terminal int) int { return g.action[state*{{ .terminalCount }}+terminal] } func (g *grammarImpl) GoTo(state int, lhs int) int { return g.goTo[state*{{ .nonTerminalCount }}+lhs] } func (g *grammarImpl) AlternativeSymbolCount(prod int) int { return g.alternativeSymbolCounts[prod] } func (g *grammarImpl) TerminalCount() int { return {{ .terminalCount }} } func (g *grammarImpl) SkipTerminal(terminal int) bool { return g.terminalSkip[terminal] == 1 } func (g *grammarImpl) ErrorTrapperState(state int) bool { return g.errorTrapperStates[state] != 0 } func (g *grammarImpl) NonTerminal(nonTerminal int) string { return g.nonTerminals[nonTerminal] } func (g *grammarImpl) LHS(prod int) int { return g.lhsSymbols[prod] } func (g *grammarImpl) EOF() int { return {{ .eofSymbol }} } func (g *grammarImpl) Error() int { return {{ .errorSymbol }} } func (g *grammarImpl) Terminal(terminal int) string { return g.terminals[terminal] } func (g *grammarImpl) ASTAction(prod int) []int { return g.astActions[prod] } ` func genGrammarTemplateFuncs(cgram *spec.CompiledGrammar) template.FuncMap { return template.FuncMap{ "genRecoverProductions": func() string { var b strings.Builder fmt.Fprintf(&b, "[]int{\n") c := 1 for _, v := range cgram.Syntactic.RecoverProductions { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "}") return b.String() }, "genAction": func() string { var b strings.Builder fmt.Fprintf(&b, "[]int{\n") c := 1 for _, v := range cgram.Syntactic.Action { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "}") return b.String() }, "genGoTo": func() string { var b strings.Builder fmt.Fprintf(&b, "[]int{\n") c := 1 for _, v := range cgram.Syntactic.GoTo { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "}") return b.String() }, "genAlternativeSymbolCounts": func() string { var b strings.Builder fmt.Fprintf(&b, "[]int{\n") c := 1 for _, v := range cgram.Syntactic.AlternativeSymbolCounts { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "}") return b.String() }, "genErrorTrapperStates": func() string { var b strings.Builder fmt.Fprintf(&b, "[]int{\n") c := 1 for _, v := range cgram.Syntactic.ErrorTrapperStates { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "}") return b.String() }, "genNonTerminals": func() string { var b strings.Builder fmt.Fprintf(&b, "[]string{\n") for _, v := range cgram.Syntactic.NonTerminals { fmt.Fprintf(&b, "%v,\n", strconv.Quote(v)) } fmt.Fprintf(&b, "}") return b.String() }, "genLHSSymbols": func() string { var b strings.Builder fmt.Fprintf(&b, "[]int{\n") c := 1 for _, v := range cgram.Syntactic.LHSSymbols { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "}") return b.String() }, "genTerminals": func() string { var b strings.Builder fmt.Fprintf(&b, "[]string{\n") for _, v := range cgram.Syntactic.Terminals { fmt.Fprintf(&b, "%v,\n", strconv.Quote(v)) } fmt.Fprintf(&b, "}") return b.String() }, "genTerminalSkip": func() string { var b strings.Builder fmt.Fprintf(&b, "[]int{\n") c := 1 for _, v := range cgram.Syntactic.TerminalSkip { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "}") return b.String() }, "genASTActions": func() string { var b strings.Builder fmt.Fprintf(&b, "[][]int{\n") for _, entries := range cgram.ASTAction.Entries { if len(entries) == 0 { fmt.Fprintf(&b, "nil,\n") continue } fmt.Fprintf(&b, "{\n") c := 1 for _, v := range entries { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "},\n") } fmt.Fprintf(&b, "}") return b.String() }, } } const lexerSrcTmplate = ` type vToken struct { terminalID int tok *Token } func (t *vToken) TerminalID() int { return t.terminalID } func (t *vToken) Lexeme() []byte { return t.tok.Lexeme } func (t *vToken) EOF() bool { return t.tok.EOF } func (t *vToken) Invalid() bool { return t.tok.Invalid } func (t *vToken) BytePosition() (int, int) { return t.tok.BytePos, t.tok.ByteLen } func (t *vToken) Position() (int, int) { return t.tok.Row, t.tok.Col } var kindToTerminal = {{ genKindToTerminal }} type tokenStream struct { lex *Lexer kindToTerminal []int } func NewTokenStream(src io.Reader) (*tokenStream, error) { lex, err := NewLexer(NewLexSpec(), src) if err != nil { return nil, err } return &tokenStream{ lex: lex, }, nil } func (t *tokenStream) Next() (VToken, error) { tok, err := t.lex.Next() if err != nil { return nil, err } return &vToken{ terminalID: kindToTerminal[tok.KindID], tok: tok, }, nil } ` func genLexerTemplateFuncs(cgram *spec.CompiledGrammar) template.FuncMap { return template.FuncMap{ "genKindToTerminal": func() string { var b strings.Builder fmt.Fprintf(&b, "[]int{\n") c := 1 for _, v := range cgram.Syntactic.KindToTerminal { fmt.Fprintf(&b, "%v, ", v) if c == 20 { fmt.Fprintf(&b, "\n") c = 1 } else { c++ } } if c > 1 { fmt.Fprintf(&b, "\n") } fmt.Fprintf(&b, "}") return b.String() }, } } func GenSemanticAction(pkgName string) ([]byte, error) { var src string { tmpl := `// Code generated by vartan-go. DO NOT EDIT. {{ .semActSrc }} ` t, err := template.New("").Parse(tmpl) if err != nil { return nil, err } var b strings.Builder err = t.Execute(&b, map[string]string{ "semActSrc": semActSrc, }) if err != nil { return nil, err } src = b.String() } fset := goToken.NewFileSet() f, err := parser.ParseFile(fset, "", src, parser.ParseComments) if err != nil { return nil, err } f.Name = ast.NewIdent(pkgName) var b bytes.Buffer err = format.Node(&b, fset, f) if err != nil { return nil, err } return b.Bytes(), nil } type vToken struct { terminalID int tok *lexer.Token } func (t *vToken) TerminalID() int { return t.terminalID } func (t *vToken) Lexeme() []byte { return t.tok.Lexeme } func (t *vToken) EOF() bool { return t.tok.EOF } func (t *vToken) Invalid() bool { return t.tok.Invalid } func (t *vToken) BytePosition() (int, int) { return t.tok.BytePos, t.tok.ByteLen } func (t *vToken) Position() (int, int) { return t.tok.Row, t.tok.Col } type tokenStream struct { lex *lexer.Lexer kindToTerminal []int } func NewTokenStream(g *spec.CompiledGrammar, src io.Reader) (TokenStream, error) { lex, err := lexer.NewLexer(lexer.NewLexSpec(g.Lexical), src) if err != nil { return nil, err } return &tokenStream{ lex: lex, kindToTerminal: g.Syntactic.KindToTerminal, }, nil } func (l *tokenStream) Next() (VToken, error) { tok, err := l.lex.Next() if err != nil { return nil, err } return &vToken{ terminalID: l.kindToTerminal[tok.KindID], tok: tok, }, nil }