aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2022-03-25 01:55:07 +0900
committerRyo Nihei <nihei.dev@gmail.com>2022-03-27 00:34:55 +0900
commitd3867e0769a90be422e2514e16017236e040a130 (patch)
tree83b80578f0b1e0d37a975b8ac7fbadb486a07e84
parentUse grammar via an interface (diff)
downloadurubu-d3867e0769a90be422e2514e16017236e040a130.tar.gz
urubu-d3867e0769a90be422e2514e16017236e040a130.tar.xz
Use a lexer via interface
-rw-r--r--cmd/vartan/parse.go17
-rw-r--r--driver/conflict_test.go7
-rw-r--r--driver/lac_test.go14
-rw-r--r--driver/parser.go66
-rw-r--r--driver/parser_test.go7
-rw-r--r--driver/semantic_action.go33
-rw-r--r--driver/semantic_action_test.go20
-rw-r--r--driver/spec.go51
-rw-r--r--driver/syntax_error_test.go7
-rw-r--r--driver/token_stream.go69
10 files changed, 194 insertions, 97 deletions
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go
index 0e95405..caf1cbd 100644
--- a/cmd/vartan/parse.go
+++ b/cmd/vartan/parse.go
@@ -98,7 +98,12 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
}
}
- p, err = driver.NewParser(driver.NewGrammar(cgram), src, opts...)
+ toks, err := driver.NewTokenStream(cgram, src)
+ if err != nil {
+ return err
+ }
+
+ p, err = driver.NewParser(toks, driver.NewGrammar(cgram), opts...)
if err != nil {
return err
}
@@ -115,13 +120,13 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
var msg string
switch {
- case tok.EOF:
+ case tok.EOF():
msg = "<eof>"
- case tok.Invalid:
- msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme))
+ case tok.Invalid():
+ msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme()))
default:
- k := cgram.LexicalSpecification.Maleeni.Spec.KindNames[tok.KindID]
- msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme), k)
+ t := cgram.ParsingTable.Terminals[tok.TerminalID()]
+ msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), t)
}
fmt.Fprintf(os.Stderr, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg)
diff --git a/driver/conflict_test.go b/driver/conflict_test.go
index 4517b33..c827107 100644
--- a/driver/conflict_test.go
+++ b/driver/conflict_test.go
@@ -349,8 +349,13 @@ assign: '=';
t.Fatal(err)
}
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
treeAct := NewSyntaxTreeActionSet(gram, false, true)
- p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(treeAct))
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(treeAct))
if err != nil {
t.Fatal(err)
}
diff --git a/driver/lac_test.go b/driver/lac_test.go
index 9b7bf40..c127c0b 100644
--- a/driver/lac_test.go
+++ b/driver/lac_test.go
@@ -66,7 +66,12 @@ d: 'd';
gram: gram,
}
- p, err := NewParser(NewGrammar(gram), strings.NewReader(src), SemanticAction(semAct))
+ toks, err := NewTokenStream(gram, strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
if err != nil {
t.Fatal(err)
}
@@ -92,7 +97,12 @@ d: 'd';
gram: gram,
}
- p, err := NewParser(NewGrammar(gram), strings.NewReader(src), SemanticAction(semAct), DisableLAC())
+ toks, err := NewTokenStream(gram, strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC())
if err != nil {
t.Fatal(err)
}
diff --git a/driver/parser.go b/driver/parser.go
index 08a4c4a..b765982 100644
--- a/driver/parser.go
+++ b/driver/parser.go
@@ -2,9 +2,6 @@ package driver
import (
"fmt"
- "io"
-
- mldriver "github.com/nihei9/maleeni/driver"
)
type Grammar interface {
@@ -35,9 +32,6 @@ type Grammar interface {
// RecoverProduction returns true when a production has the recover directive.
RecoverProduction(prod int) bool
- // LexicalSpecification returns a lexical specification.
- LexicalSpecification() mldriver.LexSpec
-
// TerminalCount returns a terminal symbol count of grammar.
TerminalCount() int
@@ -52,19 +46,37 @@ type Grammar interface {
// TerminalAlias returns an alias for a terminal.
TerminalAlias(terminal int) string
+}
+
+type Token interface {
+ // TerminalID returns a terminal ID.
+ TerminalID() int
+
+ // Lexeme returns a lexeme.
+ Lexeme() []byte
+
+ // EOF returns true when a token represents EOF.
+ EOF() bool
- // Skip returns true when a terminal symbol must be skipped.
- Skip(kind mldriver.KindID) bool
+ // Invalid returns true when a token is invalid.
+ Invalid() bool
- // LexicalKindToTerminal maps a lexical kind to a terminal symbol.
- LexicalKindToTerminal(kind mldriver.KindID) int
+ // Position returns (row, column) pair.
+ Position() (int, int)
+
+ // Skip returns true when a token must be skipped on syntax analysis.
+ Skip() bool
+}
+
+type TokenStream interface {
+ Next() (Token, error)
}
type SyntaxError struct {
Row int
Col int
Message string
- Token *mldriver.Token
+ Token Token
ExpectedTerminals []string
}
@@ -86,8 +98,8 @@ func SemanticAction(semAct SemanticActionSet) ParserOption {
}
type Parser struct {
+ toks TokenStream
gram Grammar
- lex *mldriver.Lexer
stateStack *stateStack
semAct SemanticActionSet
disableLAC bool
@@ -96,15 +108,10 @@ type Parser struct {
synErrs []*SyntaxError
}
-func NewParser(gram Grammar, src io.Reader, opts ...ParserOption) (*Parser, error) {
- lex, err := mldriver.NewLexer(gram.LexicalSpecification(), src)
- if err != nil {
- return nil, err
- }
-
+func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) {
p := &Parser{
+ toks: toks,
gram: gram,
- lex: lex,
stateStack: &stateStack{},
}
@@ -187,7 +194,7 @@ ACTION_LOOP:
if err != nil {
return err
}
- if tok.EOF {
+ if tok.EOF() {
if p.semAct != nil {
p.semAct.MissError(tok)
}
@@ -198,9 +205,10 @@ ACTION_LOOP:
continue ACTION_LOOP
}
+ row, col := tok.Position()
p.synErrs = append(p.synErrs, &SyntaxError{
- Row: tok.Row,
- Col: tok.Col,
+ Row: row,
+ Col: col,
Message: "unexpected token",
Token: tok,
ExpectedTerminals: p.searchLookahead(p.stateStack.top()),
@@ -261,17 +269,17 @@ func (p *Parser) validateLookahead(term int) bool {
}
}
-func (p *Parser) nextToken() (*mldriver.Token, error) {
+func (p *Parser) nextToken() (Token, error) {
for {
// We don't have to check whether the token is invalid because the kind ID of the invalid token is 0,
// and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect
// a syntax error because the parser cannot find an entry corresponding to the invalid token.
- tok, err := p.lex.Next()
+ tok, err := p.toks.Next()
if err != nil {
return nil, err
}
- if p.gram.Skip(tok.KindID) {
+ if tok.Skip() {
continue
}
@@ -279,15 +287,15 @@ func (p *Parser) nextToken() (*mldriver.Token, error) {
}
}
-func (p *Parser) tokenToTerminal(tok *mldriver.Token) int {
- if tok.EOF {
+func (p *Parser) tokenToTerminal(tok Token) int {
+ if tok.EOF() {
return p.gram.EOF()
}
- return p.gram.LexicalKindToTerminal(tok.KindID)
+ return tok.TerminalID()
}
-func (p *Parser) lookupAction(tok *mldriver.Token) int {
+func (p *Parser) lookupAction(tok Token) int {
if !p.disableLAC {
term := p.tokenToTerminal(tok)
if !p.validateLookahead(term) {
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 69780bc..4e60f52 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -767,8 +767,13 @@ error: 'error' #skip;
t.Fatal(err)
}
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
treeAct := NewSyntaxTreeActionSet(gram, true, true)
- p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(treeAct))
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(treeAct))
if err != nil {
t.Fatal(err)
}
diff --git a/driver/semantic_action.go b/driver/semantic_action.go
index 8bcbd22..3023a36 100644
--- a/driver/semantic_action.go
+++ b/driver/semantic_action.go
@@ -4,14 +4,13 @@ import (
"fmt"
"io"
- mldriver "github.com/nihei9/maleeni/driver"
"github.com/nihei9/vartan/spec"
)
type SemanticActionSet interface {
// Shift runs when the driver shifts a symbol onto the state stack. `tok` is a token corresponding to
// the symbol. When the driver recovered from an error state by shifting the token, `recovered` is true.
- Shift(tok *mldriver.Token, recovered bool)
+ Shift(tok Token, recovered bool)
// Reduce runs when the driver reduces an RHS of a production to its LHS. `prodNum` is a number of
// the production. When the driver recovered from an error state by reducing the production,
@@ -26,10 +25,10 @@ type SemanticActionSet interface {
// from the state stack.
// Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token
// corresponding to the error symbol doesn't exist.
- TrapAndShiftError(cause *mldriver.Token, popped int)
+ TrapAndShiftError(cause Token, popped int)
// MissError runs when the driver fails to trap a syntax error. `cause` is a token that caused a syntax error.
- MissError(cause *mldriver.Token)
+ MissError(cause Token)
}
var _ SemanticActionSet = &SyntaxTreeActionSet{}
@@ -95,25 +94,27 @@ func NewSyntaxTreeActionSet(gram *spec.CompiledGrammar, makeAST bool, makeCST bo
}
}
-func (a *SyntaxTreeActionSet) Shift(tok *mldriver.Token, recovered bool) {
+func (a *SyntaxTreeActionSet) Shift(tok Token, recovered bool) {
term := a.tokenToTerminal(tok)
var ast *Node
var cst *Node
if a.makeAST {
+ row, col := tok.Position()
ast = &Node{
KindName: a.gram.ParsingTable.Terminals[term],
- Text: string(tok.Lexeme),
- Row: tok.Row,
- Col: tok.Col,
+ Text: string(tok.Lexeme()),
+ Row: row,
+ Col: col,
}
}
if a.makeCST {
+ row, col := tok.Position()
cst = &Node{
KindName: a.gram.ParsingTable.Terminals[term],
- Text: string(tok.Lexeme),
- Row: tok.Row,
- Col: tok.Col,
+ Text: string(tok.Lexeme()),
+ Row: row,
+ Col: col,
}
}
@@ -205,7 +206,7 @@ func (a *SyntaxTreeActionSet) Accept() {
a.ast = top[0].ast
}
-func (a *SyntaxTreeActionSet) TrapAndShiftError(cause *mldriver.Token, popped int) {
+func (a *SyntaxTreeActionSet) TrapAndShiftError(cause Token, popped int) {
a.semStack.pop(popped)
errSym := a.gram.ParsingTable.ErrorSymbol
@@ -229,7 +230,7 @@ func (a *SyntaxTreeActionSet) TrapAndShiftError(cause *mldriver.Token, popped in
})
}
-func (a *SyntaxTreeActionSet) MissError(cause *mldriver.Token) {
+func (a *SyntaxTreeActionSet) MissError(cause Token) {
}
func (a *SyntaxTreeActionSet) CST() *Node {
@@ -240,12 +241,12 @@ func (a *SyntaxTreeActionSet) AST() *Node {
return a.ast
}
-func (a *SyntaxTreeActionSet) tokenToTerminal(tok *mldriver.Token) int {
- if tok.EOF {
+func (a *SyntaxTreeActionSet) tokenToTerminal(tok Token) int {
+ if tok.EOF() {
return a.gram.ParsingTable.EOFSymbol
}
- return a.gram.LexicalSpecification.Maleeni.KindToTerminal[tok.KindID]
+ return tok.TerminalID()
}
type semanticFrame struct {
diff --git a/driver/semantic_action_test.go b/driver/semantic_action_test.go
index d3f15a8..9c66a85 100644
--- a/driver/semantic_action_test.go
+++ b/driver/semantic_action_test.go
@@ -5,7 +5,6 @@ import (
"strings"
"testing"
- mldriver "github.com/nihei9/maleeni/driver"
"github.com/nihei9/vartan/grammar"
"github.com/nihei9/vartan/spec"
)
@@ -15,12 +14,12 @@ type testSemAct struct {
actLog []string
}
-func (a *testSemAct) Shift(tok *mldriver.Token, recovered bool) {
- k := a.gram.LexicalSpecification.Maleeni.Spec.KindNames[tok.KindID]
+func (a *testSemAct) Shift(tok Token, recovered bool) {
+ t := a.gram.ParsingTable.Terminals[tok.TerminalID()]
if recovered {
- a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", k))
+ a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t))
} else {
- a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", k))
+ a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t))
}
}
@@ -38,11 +37,11 @@ func (a *testSemAct) Accept() {
a.actLog = append(a.actLog, "accept")
}
-func (a *testSemAct) TrapAndShiftError(cause *mldriver.Token, popped int) {
+func (a *testSemAct) TrapAndShiftError(cause Token, popped int) {
a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped))
}
-func (a *testSemAct) MissError(cause *mldriver.Token) {
+func (a *testSemAct) MissError(cause Token) {
a.actLog = append(a.actLog, "miss")
}
@@ -193,10 +192,15 @@ char: "[a-z]";
t.Fatal(err)
}
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
semAct := &testSemAct{
gram: gram,
}
- p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src), SemanticAction(semAct))
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
if err != nil {
t.Fatal(err)
}
diff --git a/driver/spec.go b/driver/spec.go
index 71935b4..a935577 100644
--- a/driver/spec.go
+++ b/driver/spec.go
@@ -1,84 +1,69 @@
package driver
-import (
- mldriver "github.com/nihei9/maleeni/driver"
- "github.com/nihei9/vartan/spec"
-)
+import "github.com/nihei9/vartan/spec"
-type gram struct {
+type grammarImpl struct {
g *spec.CompiledGrammar
}
-func NewGrammar(g *spec.CompiledGrammar) *gram {
- return &gram{
+func NewGrammar(g *spec.CompiledGrammar) *grammarImpl {
+ return &grammarImpl{
g: g,
}
}
-func (g *gram) LexicalSpecification() mldriver.LexSpec {
- return mldriver.NewLexSpec(g.g.LexicalSpecification.Maleeni.Spec)
-}
-
-func (g *gram) Class() string {
+func (g *grammarImpl) Class() string {
return g.g.ParsingTable.Class
}
-func (g *gram) InitialState() int {
+func (g *grammarImpl) InitialState() int {
return g.g.ParsingTable.InitialState
}
-func (g *gram) StartProduction() int {
+func (g *grammarImpl) StartProduction() int {
return g.g.ParsingTable.StartProduction
}
-func (g *gram) RecoverProduction(prod int) bool {
+func (g *grammarImpl) RecoverProduction(prod int) bool {
return g.g.ParsingTable.RecoverProductions[prod] != 0
}
-func (g *gram) Action(state int, terminal int) int {
+func (g *grammarImpl) Action(state int, terminal int) int {
return g.g.ParsingTable.Action[state*g.g.ParsingTable.TerminalCount+terminal]
}
-func (g *gram) GoTo(state int, lhs int) int {
+func (g *grammarImpl) GoTo(state int, lhs int) int {
return g.g.ParsingTable.GoTo[state*g.g.ParsingTable.NonTerminalCount+lhs]
}
-func (g *gram) AlternativeSymbolCount(prod int) int {
+func (g *grammarImpl) AlternativeSymbolCount(prod int) int {
return g.g.ParsingTable.AlternativeSymbolCounts[prod]
}
-func (g *gram) TerminalCount() int {
+func (g *grammarImpl) TerminalCount() int {
return g.g.ParsingTable.TerminalCount
}
-func (g *gram) ErrorTrapperState(state int) bool {
+func (g *grammarImpl) ErrorTrapperState(state int) bool {
return g.g.ParsingTable.ErrorTrapperStates[state] != 0
}
-func (g *gram) LHS(prod int) int {
+func (g *grammarImpl) LHS(prod int) int {
return g.g.ParsingTable.LHSSymbols[prod]
}
-func (g *gram) EOF() int {
+func (g *grammarImpl) EOF() int {
return g.g.ParsingTable.EOFSymbol
}
-func (g *gram) Error() int {
+func (g *grammarImpl) Error() int {
return g.g.ParsingTable.ErrorSymbol
}
-func (g *gram) Terminal(terminal int) string {
+func (g *grammarImpl) Terminal(terminal int) string {
return g.g.ParsingTable.Terminals[terminal]
}
-func (g *gram) TerminalAlias(terminal int) string {
+func (g *grammarImpl) TerminalAlias(terminal int) string {
return g.g.LexicalSpecification.Maleeni.KindAliases[terminal]
}
-
-func (g *gram) Skip(kind mldriver.KindID) bool {
- return g.g.LexicalSpecification.Maleeni.Skip[kind] > 0
-}
-
-func (g *gram) LexicalKindToTerminal(kind mldriver.KindID) int {
- return g.g.LexicalSpecification.Maleeni.KindToTerminal[kind]
-}
diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go
index 8feec91..a7f363f 100644
--- a/driver/syntax_error_test.go
+++ b/driver/syntax_error_test.go
@@ -119,7 +119,12 @@ c: 'c';
t.Fatal(err)
}
- p, err := NewParser(NewGrammar(gram), strings.NewReader(tt.src))
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram))
if err != nil {
t.Fatal(err)
}
diff --git a/driver/token_stream.go b/driver/token_stream.go
new file mode 100644
index 0000000..feb86ae
--- /dev/null
+++ b/driver/token_stream.go
@@ -0,0 +1,69 @@
+package driver
+
+import (
+ "io"
+
+ mldriver "github.com/nihei9/maleeni/driver"
+ "github.com/nihei9/vartan/spec"
+)
+
+type token struct {
+ terminalID int
+ skip bool
+ tok *mldriver.Token
+}
+
+func (t *token) TerminalID() int {
+ return t.terminalID
+}
+
+func (t *token) Lexeme() []byte {
+ return t.tok.Lexeme
+}
+
+func (t *token) EOF() bool {
+ return t.tok.EOF
+}
+
+func (t *token) Invalid() bool {
+ return t.tok.Invalid
+}
+
+func (t *token) Skip() bool {
+ return t.skip
+}
+
+func (t *token) Position() (int, int) {
+ return t.tok.Row, t.tok.Col
+}
+
+type tokenStream struct {
+ lex *mldriver.Lexer
+ kindToTerminal []int
+ skip []int
+}
+
+func NewTokenStream(g *spec.CompiledGrammar, src io.Reader) (TokenStream, error) {
+ lex, err := mldriver.NewLexer(mldriver.NewLexSpec(g.LexicalSpecification.Maleeni.Spec), src)
+ if err != nil {
+ return nil, err
+ }
+
+ return &tokenStream{
+ lex: lex,
+ kindToTerminal: g.LexicalSpecification.Maleeni.KindToTerminal,
+ skip: g.LexicalSpecification.Maleeni.Skip,
+ }, nil
+}
+
+func (l *tokenStream) Next() (Token, error) {
+ tok, err := l.lex.Next()
+ if err != nil {
+ return nil, err
+ }
+ return &token{
+ terminalID: l.kindToTerminal[tok.KindID],
+ skip: l.skip[tok.KindID] > 0,
+ tok: tok,
+ }, nil
+}