diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2022-03-25 01:55:07 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2022-03-27 00:34:55 +0900 |
commit | d3867e0769a90be422e2514e16017236e040a130 (patch) | |
tree | 83b80578f0b1e0d37a975b8ac7fbadb486a07e84 | |
parent | Use grammar via an interface (diff) | |
download | urubu-d3867e0769a90be422e2514e16017236e040a130.tar.gz urubu-d3867e0769a90be422e2514e16017236e040a130.tar.xz |
Use a lexer via interface
-rw-r--r-- | cmd/vartan/parse.go | 17 | ||||
-rw-r--r-- | driver/conflict_test.go | 7 | ||||
-rw-r--r-- | driver/lac_test.go | 14 | ||||
-rw-r--r-- | driver/parser.go | 66 | ||||
-rw-r--r-- | driver/parser_test.go | 7 | ||||
-rw-r--r-- | driver/semantic_action.go | 33 | ||||
-rw-r--r-- | driver/semantic_action_test.go | 20 | ||||
-rw-r--r-- | driver/spec.go | 51 | ||||
-rw-r--r-- | driver/syntax_error_test.go | 7 | ||||
-rw-r--r-- | driver/token_stream.go | 69 |
10 files changed, 194 insertions, 97 deletions
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go index 0e95405..caf1cbd 100644 --- a/cmd/vartan/parse.go +++ b/cmd/vartan/parse.go @@ -98,7 +98,12 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { } } - p, err = driver.NewParser(driver.NewGrammar(cgram), src, opts...) + toks, err := driver.NewTokenStream(cgram, src) + if err != nil { + return err + } + + p, err = driver.NewParser(toks, driver.NewGrammar(cgram), opts...) if err != nil { return err } @@ -115,13 +120,13 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) { var msg string switch { - case tok.EOF: + case tok.EOF(): msg = "<eof>" - case tok.Invalid: - msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme)) + case tok.Invalid(): + msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme())) default: - k := cgram.LexicalSpecification.Maleeni.Spec.KindNames[tok.KindID] - msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme), k) + t := cgram.ParsingTable.Terminals[tok.TerminalID()] + msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), t) } fmt.Fprintf(os.Stderr, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg) diff --git a/driver/conflict_test.go b/driver/conflict_test.go index 4517b33..c827107 100644 --- a/driver/conflict_test.go +++ b/driver/conflict_test.go @@ -349,8 +349,13 @@ assign: '='; t.Fatal(err) } + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + treeAct := NewSyntaxTreeActionSet(gram, false, true) - p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(treeAct)) + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(treeAct)) if err != nil { t.Fatal(err) } diff --git a/driver/lac_test.go b/driver/lac_test.go index 9b7bf40..c127c0b 100644 --- a/driver/lac_test.go +++ b/driver/lac_test.go @@ -66,7 +66,12 @@ d: 'd'; gram: gram, } - p, err := NewParser(NewGrammar(gram), strings.NewReader(src), SemanticAction(semAct)) + toks, err := NewTokenStream(gram, strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) if err != nil { t.Fatal(err) } @@ -92,7 +97,12 @@ d: 'd'; gram: gram, } - p, err := NewParser(NewGrammar(gram), strings.NewReader(src), SemanticAction(semAct), DisableLAC()) + toks, err := NewTokenStream(gram, strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC()) if err != nil { t.Fatal(err) } diff --git a/driver/parser.go b/driver/parser.go index 08a4c4a..b765982 100644 --- a/driver/parser.go +++ b/driver/parser.go @@ -2,9 +2,6 @@ package driver import ( "fmt" - "io" - - mldriver "github.com/nihei9/maleeni/driver" ) type Grammar interface { @@ -35,9 +32,6 @@ type Grammar interface { // RecoverProduction returns true when a production has the recover directive. RecoverProduction(prod int) bool - // LexicalSpecification returns a lexical specification. - LexicalSpecification() mldriver.LexSpec - // TerminalCount returns a terminal symbol count of grammar. TerminalCount() int @@ -52,19 +46,37 @@ type Grammar interface { // TerminalAlias returns an alias for a terminal. TerminalAlias(terminal int) string +} + +type Token interface { + // TerminalID returns a terminal ID. + TerminalID() int + + // Lexeme returns a lexeme. + Lexeme() []byte + + // EOF returns true when a token represents EOF. + EOF() bool - // Skip returns true when a terminal symbol must be skipped. - Skip(kind mldriver.KindID) bool + // Invalid returns true when a token is invalid. + Invalid() bool - // LexicalKindToTerminal maps a lexical kind to a terminal symbol. - LexicalKindToTerminal(kind mldriver.KindID) int + // Position returns (row, column) pair. + Position() (int, int) + + // Skip returns true when a token must be skipped on syntax analysis. + Skip() bool +} + +type TokenStream interface { + Next() (Token, error) } type SyntaxError struct { Row int Col int Message string - Token *mldriver.Token + Token Token ExpectedTerminals []string } @@ -86,8 +98,8 @@ func SemanticAction(semAct SemanticActionSet) ParserOption { } type Parser struct { + toks TokenStream gram Grammar - lex *mldriver.Lexer stateStack *stateStack semAct SemanticActionSet disableLAC bool @@ -96,15 +108,10 @@ type Parser struct { synErrs []*SyntaxError } -func NewParser(gram Grammar, src io.Reader, opts ...ParserOption) (*Parser, error) { - lex, err := mldriver.NewLexer(gram.LexicalSpecification(), src) - if err != nil { - return nil, err - } - +func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) { p := &Parser{ + toks: toks, gram: gram, - lex: lex, stateStack: &stateStack{}, } @@ -187,7 +194,7 @@ ACTION_LOOP: if err != nil { return err } - if tok.EOF { + if tok.EOF() { if p.semAct != nil { p.semAct.MissError(tok) } @@ -198,9 +205,10 @@ ACTION_LOOP: continue ACTION_LOOP } + row, col := tok.Position() p.synErrs = append(p.synErrs, &SyntaxError{ - Row: tok.Row, - Col: tok.Col, + Row: row, + Col: col, Message: "unexpected token", Token: tok, ExpectedTerminals: p.searchLookahead(p.stateStack.top()), @@ -261,17 +269,17 @@ func (p *Parser) validateLookahead(term int) bool { } } -func (p *Parser) nextToken() (*mldriver.Token, error) { +func (p *Parser) nextToken() (Token, error) { for { // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0, // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect // a syntax error because the parser cannot find an entry corresponding to the invalid token. - tok, err := p.lex.Next() + tok, err := p.toks.Next() if err != nil { return nil, err } - if p.gram.Skip(tok.KindID) { + if tok.Skip() { continue } @@ -279,15 +287,15 @@ func (p *Parser) nextToken() (*mldriver.Token, error) { } } -func (p *Parser) tokenToTerminal(tok *mldriver.Token) int { - if tok.EOF { +func (p *Parser) tokenToTerminal(tok Token) int { + if tok.EOF() { return p.gram.EOF() } - return p.gram.LexicalKindToTerminal(tok.KindID) + return tok.TerminalID() } -func (p *Parser) lookupAction(tok *mldriver.Token) int { +func (p *Parser) lookupAction(tok Token) int { if !p.disableLAC { term := p.tokenToTerminal(tok) if !p.validateLookahead(term) { diff --git a/driver/parser_test.go b/driver/parser_test.go index 69780bc..4e60f52 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -767,8 +767,13 @@ error: 'error' #skip; t.Fatal(err) } + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + treeAct := NewSyntaxTreeActionSet(gram, true, true) - p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(treeAct)) + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(treeAct)) if err != nil { t.Fatal(err) } diff --git a/driver/semantic_action.go b/driver/semantic_action.go index 8bcbd22..3023a36 100644 --- a/driver/semantic_action.go +++ b/driver/semantic_action.go @@ -4,14 +4,13 @@ import ( "fmt" "io" - mldriver "github.com/nihei9/maleeni/driver" "github.com/nihei9/vartan/spec" ) type SemanticActionSet interface { // Shift runs when the driver shifts a symbol onto the state stack. `tok` is a token corresponding to // the symbol. When the driver recovered from an error state by shifting the token, `recovered` is true. - Shift(tok *mldriver.Token, recovered bool) + Shift(tok Token, recovered bool) // Reduce runs when the driver reduces an RHS of a production to its LHS. `prodNum` is a number of // the production. When the driver recovered from an error state by reducing the production, @@ -26,10 +25,10 @@ type SemanticActionSet interface { // from the state stack. // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token // corresponding to the error symbol doesn't exist. - TrapAndShiftError(cause *mldriver.Token, popped int) + TrapAndShiftError(cause Token, popped int) // MissError runs when the driver fails to trap a syntax error. `cause` is a token that caused a syntax error. - MissError(cause *mldriver.Token) + MissError(cause Token) } var _ SemanticActionSet = &SyntaxTreeActionSet{} @@ -95,25 +94,27 @@ func NewSyntaxTreeActionSet(gram *spec.CompiledGrammar, makeAST bool, makeCST bo } } -func (a *SyntaxTreeActionSet) Shift(tok *mldriver.Token, recovered bool) { +func (a *SyntaxTreeActionSet) Shift(tok Token, recovered bool) { term := a.tokenToTerminal(tok) var ast *Node var cst *Node if a.makeAST { + row, col := tok.Position() ast = &Node{ KindName: a.gram.ParsingTable.Terminals[term], - Text: string(tok.Lexeme), - Row: tok.Row, - Col: tok.Col, + Text: string(tok.Lexeme()), + Row: row, + Col: col, } } if a.makeCST { + row, col := tok.Position() cst = &Node{ KindName: a.gram.ParsingTable.Terminals[term], - Text: string(tok.Lexeme), - Row: tok.Row, - Col: tok.Col, + Text: string(tok.Lexeme()), + Row: row, + Col: col, } } @@ -205,7 +206,7 @@ func (a *SyntaxTreeActionSet) Accept() { a.ast = top[0].ast } -func (a *SyntaxTreeActionSet) TrapAndShiftError(cause *mldriver.Token, popped int) { +func (a *SyntaxTreeActionSet) TrapAndShiftError(cause Token, popped int) { a.semStack.pop(popped) errSym := a.gram.ParsingTable.ErrorSymbol @@ -229,7 +230,7 @@ func (a *SyntaxTreeActionSet) TrapAndShiftError(cause *mldriver.Token, popped in }) } -func (a *SyntaxTreeActionSet) MissError(cause *mldriver.Token) { +func (a *SyntaxTreeActionSet) MissError(cause Token) { } func (a *SyntaxTreeActionSet) CST() *Node { @@ -240,12 +241,12 @@ func (a *SyntaxTreeActionSet) AST() *Node { return a.ast } -func (a *SyntaxTreeActionSet) tokenToTerminal(tok *mldriver.Token) int { - if tok.EOF { +func (a *SyntaxTreeActionSet) tokenToTerminal(tok Token) int { + if tok.EOF() { return a.gram.ParsingTable.EOFSymbol } - return a.gram.LexicalSpecification.Maleeni.KindToTerminal[tok.KindID] + return tok.TerminalID() } type semanticFrame struct { diff --git a/driver/semantic_action_test.go b/driver/semantic_action_test.go index d3f15a8..9c66a85 100644 --- a/driver/semantic_action_test.go +++ b/driver/semantic_action_test.go @@ -5,7 +5,6 @@ import ( "strings" "testing" - mldriver "github.com/nihei9/maleeni/driver" "github.com/nihei9/vartan/grammar" "github.com/nihei9/vartan/spec" ) @@ -15,12 +14,12 @@ type testSemAct struct { actLog []string } -func (a *testSemAct) Shift(tok *mldriver.Token, recovered bool) { - k := a.gram.LexicalSpecification.Maleeni.Spec.KindNames[tok.KindID] +func (a *testSemAct) Shift(tok Token, recovered bool) { + t := a.gram.ParsingTable.Terminals[tok.TerminalID()] if recovered { - a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", k)) + a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t)) } else { - a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", k)) + a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t)) } } @@ -38,11 +37,11 @@ func (a *testSemAct) Accept() { a.actLog = append(a.actLog, "accept") } -func (a *testSemAct) TrapAndShiftError(cause *mldriver.Token, popped int) { +func (a *testSemAct) TrapAndShiftError(cause Token, popped int) { a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped)) } -func (a *testSemAct) MissError(cause *mldriver.Token) { +func (a *testSemAct) MissError(cause Token) { a.actLog = append(a.actLog, "miss") } @@ -193,10 +192,15 @@ char: "[a-z]"; t.Fatal(err) } + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + semAct := &testSemAct{ gram: gram, } - p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(semAct)) + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) if err != nil { t.Fatal(err) } diff --git a/driver/spec.go b/driver/spec.go index 71935b4..a935577 100644 --- a/driver/spec.go +++ b/driver/spec.go @@ -1,84 +1,69 @@ package driver -import ( - mldriver "github.com/nihei9/maleeni/driver" - "github.com/nihei9/vartan/spec" -) +import "github.com/nihei9/vartan/spec" -type gram struct { +type grammarImpl struct { g *spec.CompiledGrammar } -func NewGrammar(g *spec.CompiledGrammar) *gram { - return &gram{ +func NewGrammar(g *spec.CompiledGrammar) *grammarImpl { + return &grammarImpl{ g: g, } } -func (g *gram) LexicalSpecification() mldriver.LexSpec { - return mldriver.NewLexSpec(g.g.LexicalSpecification.Maleeni.Spec) -} - -func (g *gram) Class() string { +func (g *grammarImpl) Class() string { return g.g.ParsingTable.Class } -func (g *gram) InitialState() int { +func (g *grammarImpl) InitialState() int { return g.g.ParsingTable.InitialState } -func (g *gram) StartProduction() int { +func (g *grammarImpl) StartProduction() int { return g.g.ParsingTable.StartProduction } -func (g *gram) RecoverProduction(prod int) bool { +func (g *grammarImpl) RecoverProduction(prod int) bool { return g.g.ParsingTable.RecoverProductions[prod] != 0 } -func (g *gram) Action(state int, terminal int) int { +func (g *grammarImpl) Action(state int, terminal int) int { return g.g.ParsingTable.Action[state*g.g.ParsingTable.TerminalCount+terminal] } -func (g *gram) GoTo(state int, lhs int) int { +func (g *grammarImpl) GoTo(state int, lhs int) int { return g.g.ParsingTable.GoTo[state*g.g.ParsingTable.NonTerminalCount+lhs] } -func (g *gram) AlternativeSymbolCount(prod int) int { +func (g *grammarImpl) AlternativeSymbolCount(prod int) int { return g.g.ParsingTable.AlternativeSymbolCounts[prod] } -func (g *gram) TerminalCount() int { +func (g *grammarImpl) TerminalCount() int { return g.g.ParsingTable.TerminalCount } -func (g *gram) ErrorTrapperState(state int) bool { +func (g *grammarImpl) ErrorTrapperState(state int) bool { return g.g.ParsingTable.ErrorTrapperStates[state] != 0 } -func (g *gram) LHS(prod int) int { +func (g *grammarImpl) LHS(prod int) int { return g.g.ParsingTable.LHSSymbols[prod] } -func (g *gram) EOF() int { +func (g *grammarImpl) EOF() int { return g.g.ParsingTable.EOFSymbol } -func (g *gram) Error() int { +func (g *grammarImpl) Error() int { return g.g.ParsingTable.ErrorSymbol } -func (g *gram) Terminal(terminal int) string { +func (g *grammarImpl) Terminal(terminal int) string { return g.g.ParsingTable.Terminals[terminal] } -func (g *gram) TerminalAlias(terminal int) string { +func (g *grammarImpl) TerminalAlias(terminal int) string { return g.g.LexicalSpecification.Maleeni.KindAliases[terminal] } - -func (g *gram) Skip(kind mldriver.KindID) bool { - return g.g.LexicalSpecification.Maleeni.Skip[kind] > 0 -} - -func (g *gram) LexicalKindToTerminal(kind mldriver.KindID) int { - return g.g.LexicalSpecification.Maleeni.KindToTerminal[kind] -} diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go index 8feec91..a7f363f 100644 --- a/driver/syntax_error_test.go +++ b/driver/syntax_error_test.go @@ -119,7 +119,12 @@ c: 'c'; t.Fatal(err) } - p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src)) + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram)) if err != nil { t.Fatal(err) } diff --git a/driver/token_stream.go b/driver/token_stream.go new file mode 100644 index 0000000..feb86ae --- /dev/null +++ b/driver/token_stream.go @@ -0,0 +1,69 @@ +package driver + +import ( + "io" + + mldriver "github.com/nihei9/maleeni/driver" + "github.com/nihei9/vartan/spec" +) + +type token struct { + terminalID int + skip bool + tok *mldriver.Token +} + +func (t *token) TerminalID() int { + return t.terminalID +} + +func (t *token) Lexeme() []byte { + return t.tok.Lexeme +} + +func (t *token) EOF() bool { + return t.tok.EOF +} + +func (t *token) Invalid() bool { + return t.tok.Invalid +} + +func (t *token) Skip() bool { + return t.skip +} + +func (t *token) Position() (int, int) { + return t.tok.Row, t.tok.Col +} + +type tokenStream struct { + lex *mldriver.Lexer + kindToTerminal []int + skip []int +} + +func NewTokenStream(g *spec.CompiledGrammar, src io.Reader) (TokenStream, error) { + lex, err := mldriver.NewLexer(mldriver.NewLexSpec(g.LexicalSpecification.Maleeni.Spec), src) + if err != nil { + return nil, err + } + + return &tokenStream{ + lex: lex, + kindToTerminal: g.LexicalSpecification.Maleeni.KindToTerminal, + skip: g.LexicalSpecification.Maleeni.Skip, + }, nil +} + +func (l *tokenStream) Next() (Token, error) { + tok, err := l.lex.Next() + if err != nil { + return nil, err + } + return &token{ + terminalID: l.kindToTerminal[tok.KindID], + skip: l.skip[tok.KindID] > 0, + tok: tok, + }, nil +} |