aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2022-03-27 02:32:22 +0900
committerRyo Nihei <nihei.dev@gmail.com>2022-03-27 20:25:04 +0900
commitd0431e3a435e2ad3180d945f66098c04ed0faf22 (patch)
tree2963de49b509e639153091cf259eae4cfc51814e
parentUse a lexer via interface (diff)
downloadurubu-d0431e3a435e2ad3180d945f66098c04ed0faf22.tar.gz
urubu-d0431e3a435e2ad3180d945f66098c04ed0faf22.tar.xz
Add vartan-go command
-rw-r--r--README.md279
-rw-r--r--cmd/vartan-go/generate.go153
-rw-r--r--cmd/vartan-go/main.go12
-rw-r--r--cmd/vartan/parse.go20
-rw-r--r--driver/conflict_test.go7
-rw-r--r--driver/parser.go18
-rw-r--r--driver/parser_test.go7
-rw-r--r--driver/semantic_action.go42
-rw-r--r--driver/semantic_action_test.go6
-rw-r--r--driver/spec.go8
-rw-r--r--driver/template.go554
-rw-r--r--driver/token_stream.go18
12 files changed, 1016 insertions, 108 deletions
diff --git a/README.md b/README.md
index 679d677..a9f6d30 100644
--- a/README.md
+++ b/README.md
@@ -1,80 +1,111 @@
# vartan
-vartan provides a compiler that generates a LALR(1) or SLR(1) parsing table and a driver for golang.
+vartan is a parser generator for golang and supports LALR(1) and SLR(1). vartan also provides a command to perform syntax analysis to allow easy debugging of your grammar.
[![Test](https://github.com/nihei9/vartan/actions/workflows/test.yml/badge.svg?branch=main)](https://github.com/nihei9/vartan/actions/workflows/test.yml)
-## Status
-
-🚧 Now Developing
-
## Installation
+Compiler:
+
```sh
$ go install github.com/nihei9/vartan/cmd/vartan@latest
```
+Code Generator:
+
+```sh
+$ go install github.com/nihei9/vartan/cmd/vartan-go@latest
+```
+
## Usage
+### 1. Define your grammar
+
vartan uses BNF-like DSL to define your grammar. As an example, let's write a grammar that represents a simple expression.
```
+%name expr
+
+%left mul div
+%left add sub
+
expr
- : expr add_op term
- | term
- ;
-term
- : term mul_op factor
- | factor
- ;
-factor
- : number
- | id
- ;
+ : expr add expr
+ | expr sub expr
+ | expr mul expr
+ | expr div expr
+ | func_call
+ | integer
+ | id
+ | '(' expr ')' #ast #(expr $2)
+ ;
+func_call
+ : id '(' args ')' #ast #(func_call $1 $3)
+ | id '(' ')' #ast #(func_call $1)
+ ;
+args
+ : args ',' expr #ast #(args $1... $3)
+ | expr
+ ;
whitespaces: "[\u{0009}\u{0020}]+" #skip;
-number: "[0-9]+";
-id: "[A-Za-z_]+";
-add_op: '+';
-mul_op: '*';
+integer: "0|[1-9][0-9]*";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+add: '+';
+sub: '-';
+mul: '*';
+div: '/';
```
Save the above grammar to a file in UTF-8. In this explanation, the file name is `expr.vr`.
+⚠️ The input file must be encoded in UTF-8.
+
+### 2. Compile the grammar
+
Next, generate a parsing table using `vartan compile` command.
```sh
$ vartan compile -g expr.vr -o expr.json
+16 conflicts
```
+### 3. Debug
+
+#### 3.1. Parse
+
If you want to make sure that the grammar behaves as expected, you can use `vartan parse` command to try parse without implementing a driver.
⚠️ An encoding that `vartan parse` command and the driver can handle is only UTF-8.
```sh
-$ echo -n 'foo + bar * baz * 100' | vartan parse expr.json
+$ echo -n 'foo(10, bar(a)) + 99 * x' | vartan parse expr.json
expr
├─ expr
-│ └─ term
-│ └─ factor
-│ └─ id "foo"
-├─ add_op "+"
-└─ term
- ├─ term
- │ ├─ term
- │ │ └─ factor
- │ │ └─ id "bar"
- │ ├─ mul_op "*"
- │ └─ factor
- │ └─ id "baz"
- ├─ mul_op "*"
- └─ factor
- └─ number "100"
+│ └─ func_call
+│ ├─ id "foo"
+│ └─ args
+│ ├─ expr
+│ │ └─ integer "10"
+│ └─ expr
+│ └─ func_call
+│ ├─ id "bar"
+│ └─ args
+│ └─ expr
+│ └─ id "a"
+├─ add "+"
+└─ expr
+ ├─ expr
+ │ └─ integer "99"
+ ├─ mul "*"
+ └─ expr
+ └─ id "x"
```
When `vartan parse` command successfully parses the input data, it prints a CST or an AST (if any).
-## Debug
+#### 3.2. Resolve conflicts
`vartan compile` command also generates a description file having `-description.json` suffix along with a parsing table. This file describes each state in the parsing table in detail. If your grammar contains conflicts, see `Conflicts` and `States` sections of this file. Using `vartan show` command, you can see the description file in a readable format.
@@ -86,27 +117,38 @@ LALR(1)
# Conflicts
-No conflict was detected.
+16 conflicts were detected.
# Terminals
1 - - <eof>
2 - - error
- 3 - - whitespaces
- 4 - - number
- 5 - - id
- 6 - - add_op (+)
- 7 - - mul_op (*)
+ 3 - - x_1 (\()
+ 4 - - x_2 (\))
+ 5 - - x_3 (,)
+ 6 - - whitespaces
+ 7 - - integer
+ 8 - - id
+ 9 2 l add (+)
+ 10 2 l sub (-)
+ 11 1 l mul (*)
+ 12 1 l div (/)
# Productions
1 - - expr' → expr
- 2 - - expr → expr + term
- 3 - - expr → term
- 4 - - term → term * factor
- 5 - - term → factor
- 6 - - factor → number
- 7 - - factor → id
+ 2 2 l expr → expr + expr
+ 3 2 l expr → expr - expr
+ 4 1 l expr → expr * expr
+ 5 1 l expr → expr / expr
+ 6 - - expr → func_call
+ 7 - - expr → integer
+ 8 - - expr → id
+ 9 - - expr → \( expr \)
+ 10 - - func_call → id \( args \)
+ 11 - - func_call → id \( \)
+ 12 - - args → args , expr
+ 13 - - args → expr
# States
@@ -114,11 +156,144 @@ No conflict was detected.
1 expr' → ・ expr
-shift 4 on number
+shift 3 on \(
+shift 4 on integer
shift 5 on id
goto 1 on expr
-goto 2 on term
-goto 3 on factor
+goto 2 on func_call
+
+
+## State 1
+
+ 1 expr' → expr ・
+ 2 expr → expr ・ + expr
+ 3 expr → expr ・ - expr
+ 4 expr → expr ・ * expr
+ 5 expr → expr ・ / expr
+
+shift 6 on +
+shift 7 on -
+shift 8 on *
+shift 9 on /
+reduce 1 on <eof>
+
+
+## State 2
+
+ 6 expr → func_call ・
+
+reduce 6 on <eof>, \), ,, +, -, *, /
...
```
+
+### 4. Generate a parser
+
+Using `vartan-go` command, you can generate a source code of a parser to recognize your grammar.
+
+```sh
+$ vartan-go expr.json
+```
+
+Then you will get the following files.
+
+* `expr_parser.go`
+* `expr_lexer.go`
+* `expr_semantic_action.go`
+
+You need to implement a driver to use the parser. An example is below.
+
+```go
+package main
+
+import (
+ "fmt"
+ "io"
+ "os"
+)
+
+func main() {
+ toks, err := NewTokenStream(os.Stdin)
+ if err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+ gram := NewGrammar()
+ treeAct := NewSyntaxTreeActionSet(gram, true, false)
+ p, err := NewParser(toks, gram, SemanticAction(treeAct))
+ if err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+ err = p.Parse()
+ if err != nil {
+ fmt.Println(err)
+ os.Exit(1)
+ }
+ synErrs := p.SyntaxErrors()
+ if len(synErrs) > 0 {
+ for _, synErr := range synErrs {
+ printSyntaxError(os.Stderr, synErr, gram)
+ }
+ os.Exit(1)
+ }
+ fmt.Println("accepted")
+ PrintTree(os.Stdout, treeAct.AST())
+}
+
+func printSyntaxError(w io.Writer, synErr *SyntaxError, gram Grammar) {
+ var msg string
+ tok := synErr.Token
+ switch {
+ case tok.EOF():
+ msg = "<eof>"
+ case tok.Invalid():
+ msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme()))
+ default:
+ if alias := gram.TerminalAlias(tok.TerminalID()); alias != "" {
+ msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), alias)
+ } else {
+ msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID()))
+ }
+ }
+ fmt.Fprintf(w, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg)
+
+ if len(synErr.ExpectedTerminals) > 0 {
+ fmt.Fprintf(w, "; expected: %v", synErr.ExpectedTerminals[0])
+ for _, t := range synErr.ExpectedTerminals[1:] {
+ fmt.Fprintf(w, ", %v", t)
+ }
+ }
+
+ fmt.Fprintf(w, "\n")
+}
+```
+
+Please save the above source code to `main.go` and create a directory structure like the following.
+
+```
+/project_root
+├── expr_parser.go
+├── expr_lexer.go
+├── expr_semantic_action.go
+└── main.go (the driver you implemented)
+```
+
+Now, you can perform the syntax analysis.
+
+```sh
+$ echo -n 'foo+99' | go run .
+accepted
+expr
+├─ expr
+│ └─ id "foo"
+├─ add "+"
+└─ expr
+ └─ integer "99"
+```
+
+```sh
+$ echo -n 'foo+99?' | go run .
+1:7: unexpected token: '?' (<invalid>); expected: <eof>, +, -, *, /
+exit status 1
+```
diff --git a/cmd/vartan-go/generate.go b/cmd/vartan-go/generate.go
new file mode 100644
index 0000000..27f7236
--- /dev/null
+++ b/cmd/vartan-go/generate.go
@@ -0,0 +1,153 @@
+package main
+
+import (
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "os"
+ "runtime/debug"
+
+ mldriver "github.com/nihei9/maleeni/driver"
+ "github.com/nihei9/vartan/driver"
+ "github.com/nihei9/vartan/spec"
+ "github.com/spf13/cobra"
+)
+
+func Execute() error {
+ err := generateCmd.Execute()
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "%v\n", err)
+ return err
+ }
+
+ return nil
+}
+
+var generateFlags = struct {
+ pkgName *string
+}{}
+
+var generateCmd = &cobra.Command{
+ Use: "vartan-go",
+ Short: "Generate a parser for Go",
+ Long: `vartan-go generates a parser for Go.`,
+ Example: ` vartan-go grammar.json`,
+ Args: cobra.ExactArgs(1),
+ RunE: runGenerate,
+ SilenceErrors: true,
+ SilenceUsage: true,
+}
+
+func init() {
+ generateFlags.pkgName = generateCmd.Flags().StringP("package", "p", "main", "package name")
+}
+
+func runGenerate(cmd *cobra.Command, args []string) (retErr error) {
+ defer func() {
+ panicked := false
+ v := recover()
+ if v != nil {
+ err, ok := v.(error)
+ if !ok {
+ retErr = fmt.Errorf("an unexpected error occurred: %v", v)
+ fmt.Fprintf(os.Stderr, "%v:\n%v", retErr, string(debug.Stack()))
+ return
+ }
+
+ retErr = err
+ panicked = true
+ }
+
+ if retErr != nil {
+ if panicked {
+ fmt.Fprintf(os.Stderr, "%v:\n%v", retErr, string(debug.Stack()))
+ } else {
+ fmt.Fprintf(os.Stderr, "%v\n", retErr)
+ }
+ }
+ }()
+
+ cgram, err := readCompiledGrammar(args[0])
+ if err != nil {
+ return fmt.Errorf("Cannot read a compiled grammar: %w", err)
+ }
+
+ {
+ b, err := mldriver.GenLexer(cgram.LexicalSpecification.Maleeni.Spec, *generateFlags.pkgName)
+ if err != nil {
+ return fmt.Errorf("Failed to generate a lexer: %w", err)
+ }
+
+ filePath := fmt.Sprintf("%v_lexer.go", cgram.Name)
+
+ f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return fmt.Errorf("Failed to create an output file: %v", err)
+ }
+ defer f.Close()
+
+ _, err = f.Write(b)
+ if err != nil {
+ return fmt.Errorf("Failed to write lexer source code: %v", err)
+ }
+ }
+
+ {
+ b, err := driver.GenParser(cgram, *generateFlags.pkgName)
+ if err != nil {
+ return fmt.Errorf("Failed to generate a parser: %w", err)
+ }
+
+ filePath := fmt.Sprintf("%v_parser.go", cgram.Name)
+
+ f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return fmt.Errorf("Failed to create an output file: %v", err)
+ }
+ defer f.Close()
+
+ _, err = f.Write(b)
+ if err != nil {
+ return fmt.Errorf("Failed to write parser source code: %v", err)
+ }
+ }
+
+ {
+ b, err := driver.GenSemanticAction(*generateFlags.pkgName)
+ if err != nil {
+ return fmt.Errorf("Failed to generate a semantic action set: %w", err)
+ }
+
+ filePath := fmt.Sprintf("%v_semantic_action.go", cgram.Name)
+
+ f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
+ if err != nil {
+ return fmt.Errorf("Failed to create an output file: %v", err)
+ }
+ defer f.Close()
+
+ _, err = f.Write(b)
+ if err != nil {
+ return fmt.Errorf("Failed to write semantic action source code: %v", err)
+ }
+ }
+
+ return nil
+}
+
+func readCompiledGrammar(path string) (*spec.CompiledGrammar, error) {
+ f, err := os.Open(path)
+ if err != nil {
+ return nil, err
+ }
+ data, err := ioutil.ReadAll(f)
+ if err != nil {
+ return nil, err
+ }
+ cgram := &spec.CompiledGrammar{}
+ err = json.Unmarshal(data, cgram)
+ if err != nil {
+ return nil, err
+ }
+ return cgram, nil
+}
diff --git a/cmd/vartan-go/main.go b/cmd/vartan-go/main.go
new file mode 100644
index 0000000..701f02f
--- /dev/null
+++ b/cmd/vartan-go/main.go
@@ -0,0 +1,12 @@
+package main
+
+import (
+ "os"
+)
+
+func main() {
+ err := Execute()
+ if err != nil {
+ os.Exit(1)
+ }
+}
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go
index caf1cbd..9b15251 100644
--- a/cmd/vartan/parse.go
+++ b/cmd/vartan/parse.go
@@ -63,7 +63,7 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
return fmt.Errorf("You cannot enable --only-parse and --cst at the same time")
}
- cgram, err := readCompiledGrammar(args[0])
+ cg, err := readCompiledGrammar(args[0])
if err != nil {
return fmt.Errorf("Cannot read a compiled grammar: %w", err)
}
@@ -81,13 +81,15 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
src = f
}
+ gram := driver.NewGrammar(cg)
+
var opts []driver.ParserOption
{
switch {
case *parseFlags.cst:
- treeAct = driver.NewSyntaxTreeActionSet(cgram, false, true)
+ treeAct = driver.NewSyntaxTreeActionSet(gram, false, true)
case !*parseFlags.onlyParse:
- treeAct = driver.NewSyntaxTreeActionSet(cgram, true, false)
+ treeAct = driver.NewSyntaxTreeActionSet(gram, true, false)
}
if treeAct != nil {
opts = append(opts, driver.SemanticAction(treeAct))
@@ -98,12 +100,12 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
}
}
- toks, err := driver.NewTokenStream(cgram, src)
+ toks, err := driver.NewTokenStream(cg, src)
if err != nil {
return err
}
- p, err = driver.NewParser(toks, driver.NewGrammar(cgram), opts...)
+ p, err = driver.NewParser(toks, gram, opts...)
if err != nil {
return err
}
@@ -125,7 +127,7 @@ func runParse(cmd *cobra.Command, args []string) (retErr error) {
case tok.Invalid():
msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme()))
default:
- t := cgram.ParsingTable.Terminals[tok.TerminalID()]
+ t := cg.ParsingTable.Terminals[tok.TerminalID()]
msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), t)
}
@@ -161,10 +163,10 @@ func readCompiledGrammar(path string) (*spec.CompiledGrammar, error) {
if err != nil {
return nil, err
}
- cgram := &spec.CompiledGrammar{}
- err = json.Unmarshal(data, cgram)
+ cg := &spec.CompiledGrammar{}
+ err = json.Unmarshal(data, cg)
if err != nil {
return nil, err
}
- return cgram, nil
+ return cg, nil
}
diff --git a/driver/conflict_test.go b/driver/conflict_test.go
index c827107..f507d4f 100644
--- a/driver/conflict_test.go
+++ b/driver/conflict_test.go
@@ -344,18 +344,19 @@ assign: '=';
t.Fatal(err)
}
- gram, err := grammar.Compile(g, grammar.SpecifyClass(grammar.ClassSLR))
+ cg, err := grammar.Compile(g, grammar.SpecifyClass(grammar.ClassSLR))
if err != nil {
t.Fatal(err)
}
- toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
if err != nil {
t.Fatal(err)
}
+ gram := NewGrammar(cg)
treeAct := NewSyntaxTreeActionSet(gram, false, true)
- p, err := NewParser(toks, NewGrammar(gram), SemanticAction(treeAct))
+ p, err := NewParser(toks, gram, SemanticAction(treeAct))
if err != nil {
t.Fatal(err)
}
diff --git a/driver/parser.go b/driver/parser.go
index b765982..4c7397f 100644
--- a/driver/parser.go
+++ b/driver/parser.go
@@ -32,6 +32,9 @@ type Grammar interface {
// RecoverProduction returns true when a production has the recover directive.
RecoverProduction(prod int) bool
+ // NonTerminal retuns a string representaion of a non-terminal symbol.
+ NonTerminal(nonTerminal int) string
+
// TerminalCount returns a terminal symbol count of grammar.
TerminalCount() int
@@ -46,9 +49,12 @@ type Grammar interface {
// TerminalAlias returns an alias for a terminal.
TerminalAlias(terminal int) string
+
+ // ASTAction returns an AST action entries.
+ ASTAction(prod int) []int
}
-type Token interface {
+type VToken interface {
// TerminalID returns a terminal ID.
TerminalID() int
@@ -69,14 +75,14 @@ type Token interface {
}
type TokenStream interface {
- Next() (Token, error)
+ Next() (VToken, error)
}
type SyntaxError struct {
Row int
Col int
Message string
- Token Token
+ Token VToken
ExpectedTerminals []string
}
@@ -269,7 +275,7 @@ func (p *Parser) validateLookahead(term int) bool {
}
}
-func (p *Parser) nextToken() (Token, error) {
+func (p *Parser) nextToken() (VToken, error) {
for {
// We don't have to check whether the token is invalid because the kind ID of the invalid token is 0,
// and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect
@@ -287,7 +293,7 @@ func (p *Parser) nextToken() (Token, error) {
}
}
-func (p *Parser) tokenToTerminal(tok Token) int {
+func (p *Parser) tokenToTerminal(tok VToken) int {
if tok.EOF() {
return p.gram.EOF()
}
@@ -295,7 +301,7 @@ func (p *Parser) tokenToTerminal(tok Token) int {
return tok.TerminalID()
}
-func (p *Parser) lookupAction(tok Token) int {
+func (p *Parser) lookupAction(tok VToken) int {
if !p.disableLAC {
term := p.tokenToTerminal(tok)
if !p.validateLookahead(term) {
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 4e60f52..89d0e79 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -762,18 +762,19 @@ error: 'error' #skip;
}
}
- gram, err := grammar.Compile(g, grammar.SpecifyClass(class))
+ cg, err := grammar.Compile(g, grammar.SpecifyClass(class))
if err != nil {
t.Fatal(err)
}
- toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
if err != nil {
t.Fatal(err)
}
+ gram := NewGrammar(cg)
treeAct := NewSyntaxTreeActionSet(gram, true, true)
- p, err := NewParser(toks, NewGrammar(gram), SemanticAction(treeAct))
+ p, err := NewParser(toks, gram, SemanticAction(treeAct))
if err != nil {
t.Fatal(err)
}
diff --git a/driver/semantic_action.go b/driver/semantic_action.go
index 3023a36..d88d5fa 100644
--- a/driver/semantic_action.go
+++ b/driver/semantic_action.go
@@ -3,14 +3,12 @@ package driver
import (
"fmt"
"io"
-
- "github.com/nihei9/vartan/spec"
)
type SemanticActionSet interface {
// Shift runs when the driver shifts a symbol onto the state stack. `tok` is a token corresponding to
// the symbol. When the driver recovered from an error state by shifting the token, `recovered` is true.
- Shift(tok Token, recovered bool)
+ Shift(tok VToken, recovered bool)
// Reduce runs when the driver reduces an RHS of a production to its LHS. `prodNum` is a number of
// the production. When the driver recovered from an error state by reducing the production,
@@ -25,10 +23,10 @@ type SemanticActionSet interface {
// from the state stack.
// Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token
// corresponding to the error symbol doesn't exist.
- TrapAndShiftError(cause Token, popped int)
+ TrapAndShiftError(cause VToken, popped int)
// MissError runs when the driver fails to trap a syntax error. `cause` is a token that caused a syntax error.
- MissError(cause Token)
+ MissError(cause VToken)
}
var _ SemanticActionSet = &SyntaxTreeActionSet{}
@@ -77,7 +75,7 @@ func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix s
}
type SyntaxTreeActionSet struct {
- gram *spec.CompiledGrammar
+ gram Grammar
makeAST bool
makeCST bool
ast *Node
@@ -85,7 +83,7 @@ type SyntaxTreeActionSet struct {
semStack *semanticStack
}
-func NewSyntaxTreeActionSet(gram *spec.CompiledGrammar, makeAST bool, makeCST bool) *SyntaxTreeActionSet {
+func NewSyntaxTreeActionSet(gram Grammar, makeAST bool, makeCST bool) *SyntaxTreeActionSet {
return &SyntaxTreeActionSet{
gram: gram,
makeAST: makeAST,
@@ -94,7 +92,7 @@ func NewSyntaxTreeActionSet(gram *spec.CompiledGrammar, makeAST bool, makeCST bo
}
}
-func (a *SyntaxTreeActionSet) Shift(tok Token, recovered bool) {
+func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) {
term := a.tokenToTerminal(tok)
var ast *Node
@@ -102,7 +100,7 @@ func (a *SyntaxTreeActionSet) Shift(tok Token, recovered bool) {
if a.makeAST {
row, col := tok.Position()
ast = &Node{
- KindName: a.gram.ParsingTable.Terminals[term],
+ KindName: a.gram.Terminal(term),
Text: string(tok.Lexeme()),
Row: row,
Col: col,
@@ -111,7 +109,7 @@ func (a *SyntaxTreeActionSet) Shift(tok Token, recovered bool) {
if a.makeCST {
row, col := tok.Position()
cst = &Node{
- KindName: a.gram.ParsingTable.Terminals[term],
+ KindName: a.gram.Terminal(term),
Text: string(tok.Lexeme()),
Row: row,
Col: col,
@@ -125,16 +123,16 @@ func (a *SyntaxTreeActionSet) Shift(tok Token, recovered bool) {
}
func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) {
- lhs := a.gram.ParsingTable.LHSSymbols[prodNum]
+ lhs := a.gram.LHS(prodNum)
// When an alternative is empty, `n` will be 0, and `handle` will be empty slice.
- n := a.gram.ParsingTable.AlternativeSymbolCounts[prodNum]
+ n := a.gram.AlternativeSymbolCount(prodNum)
handle := a.semStack.pop(n)
var ast *Node
var cst *Node
if a.makeAST {
- act := a.gram.ASTAction.Entries[prodNum]
+ act := a.gram.ASTAction(prodNum)
var children []*Node
if act != nil {
// Count the number of children in advance to avoid frequent growth in a slice for children.
@@ -177,7 +175,7 @@ func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) {
}
ast = &Node{
- KindName: a.gram.ParsingTable.NonTerminals[lhs],
+ KindName: a.gram.NonTerminal(lhs),
Children: children,
}
}
@@ -188,7 +186,7 @@ func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) {
}
cst = &Node{
- KindName: a.gram.ParsingTable.NonTerminals[lhs],
+ KindName: a.gram.NonTerminal(lhs),
Children: children,
}
}
@@ -206,21 +204,19 @@ func (a *SyntaxTreeActionSet) Accept() {
a.ast = top[0].ast
}
-func (a *SyntaxTreeActionSet) TrapAndShiftError(cause Token, popped int) {
+func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) {
a.semStack.pop(popped)
- errSym := a.gram.ParsingTable.ErrorSymbol
-
var ast *Node
var cst *Node
if a.makeAST {
ast = &Node{
- KindName: a.gram.ParsingTable.Terminals[errSym],
+ KindName: a.gram.Terminal(a.gram.Error()),
}
}
if a.makeCST {
cst = &Node{
- KindName: a.gram.ParsingTable.Terminals[errSym],
+ KindName: a.gram.Terminal(a.gram.Error()),
}
}
@@ -230,7 +226,7 @@ func (a *SyntaxTreeActionSet) TrapAndShiftError(cause Token, popped int) {
})
}
-func (a *SyntaxTreeActionSet) MissError(cause Token) {
+func (a *SyntaxTreeActionSet) MissError(cause VToken) {
}
func (a *SyntaxTreeActionSet) CST() *Node {
@@ -241,9 +237,9 @@ func (a *SyntaxTreeActionSet) AST() *Node {
return a.ast
}
-func (a *SyntaxTreeActionSet) tokenToTerminal(tok Token) int {
+func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int {
if tok.EOF() {
- return a.gram.ParsingTable.EOFSymbol
+ return a.gram.EOF()
}
return tok.TerminalID()
diff --git a/driver/semantic_action_test.go b/driver/semantic_action_test.go
index 9c66a85..791b5f0 100644
--- a/driver/semantic_action_test.go
+++ b/driver/semantic_action_test.go
@@ -14,7 +14,7 @@ type testSemAct struct {
actLog []string
}
-func (a *testSemAct) Shift(tok Token, recovered bool) {
+func (a *testSemAct) Shift(tok VToken, recovered bool) {
t := a.gram.ParsingTable.Terminals[tok.TerminalID()]
if recovered {
a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t))
@@ -37,11 +37,11 @@ func (a *testSemAct) Accept() {
a.actLog = append(a.actLog, "accept")
}
-func (a *testSemAct) TrapAndShiftError(cause Token, popped int) {
+func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) {
a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped))
}
-func (a *testSemAct) MissError(cause Token) {
+func (a *testSemAct) MissError(cause VToken) {
a.actLog = append(a.actLog, "miss")
}
diff --git a/driver/spec.go b/driver/spec.go
index a935577..6127e73 100644
--- a/driver/spec.go
+++ b/driver/spec.go
@@ -48,6 +48,10 @@ func (g *grammarImpl) ErrorTrapperState(state int) bool {
return g.g.ParsingTable.ErrorTrapperStates[state] != 0
}
+func (g *grammarImpl) NonTerminal(nonTerminal int) string {
+ return g.g.ParsingTable.NonTerminals[nonTerminal]
+}
+
func (g *grammarImpl) LHS(prod int) int {
return g.g.ParsingTable.LHSSymbols[prod]
}
@@ -67,3 +71,7 @@ func (g *grammarImpl) Terminal(terminal int) string {
func (g *grammarImpl) TerminalAlias(terminal int) string {
return g.g.LexicalSpecification.Maleeni.KindAliases[terminal]
}
+
+func (g *grammarImpl) ASTAction(prod int) []int {
+ return g.g.ASTAction.Entries[prod]
+}
diff --git a/driver/template.go b/driver/template.go
new file mode 100644
index 0000000..aa1fbd3
--- /dev/null
+++ b/driver/template.go
@@ -0,0 +1,554 @@
+package driver
+
+import (
+ "bytes"
+ _ "embed"
+ "fmt"
+ "go/ast"
+ "go/format"
+ "go/parser"
+ "go/token"
+ goToken "go/token"
+ "strconv"
+ "strings"
+ "text/template"
+
+ "github.com/nihei9/vartan/spec"
+)
+
+//go:embed parser.go
+var parserCoreSrc string
+
+//go:embed semantic_action.go
+var semActSrc string
+
+func GenParser(cgram *spec.CompiledGrammar, pkgName string) ([]byte, error) {
+ var parserSrc string
+ {
+ fset := goToken.NewFileSet()
+ f, err := parser.ParseFile(fset, "parser.go", parserCoreSrc, parser.ParseComments)
+ if err != nil {
+ return nil, err
+ }
+
+ var b strings.Builder
+ err = format.Node(&b, fset, f)
+ if err != nil {
+ return nil, err
+ }
+
+ parserSrc = b.String()
+ }
+
+ var grammarSrc string
+ {
+ t, err := template.New("").Funcs(genGrammarTemplateFuncs(cgram)).Parse(grammarSrcTmplate)
+ if err != nil {
+ return nil, err
+ }
+
+ var b strings.Builder
+ err = t.Execute(&b, map[string]interface{}{
+ "class": cgram.ParsingTable.Class,
+ "initialState": cgram.ParsingTable.InitialState,
+ "startProduction": cgram.ParsingTable.StartProduction,
+ "terminalCount": cgram.ParsingTable.TerminalCount,
+ "nonTerminalCount": cgram.ParsingTable.NonTerminalCount,
+ "eofSymbol": cgram.ParsingTable.EOFSymbol,
+ "errorSymbol": cgram.ParsingTable.ErrorSymbol,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ grammarSrc = b.String()
+ }
+
+ var lexerSrc string
+ {
+ t, err := template.New("").Funcs(genLexerTemplateFuncs(cgram)).Parse(lexerSrcTmplate)
+ if err != nil {
+ return nil, err
+ }
+
+ var b strings.Builder
+ err = t.Execute(&b, nil)
+ if err != nil {
+ return nil, err
+ }
+
+ lexerSrc = b.String()
+ }
+
+ var src string
+ {
+ tmpl := `// Code generated by vartan-go. DO NOT EDIT.
+{{ .parserSrc }}
+
+{{ .grammarSrc }}
+
+{{ .lexerSrc }}
+`
+ t, err := template.New("").Parse(tmpl)
+ if err != nil {
+ return nil, err
+ }
+
+ var b strings.Builder
+ err = t.Execute(&b, map[string]string{
+ "parserSrc": parserSrc,
+ "grammarSrc": grammarSrc,
+ "lexerSrc": lexerSrc,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ src = b.String()
+ }
+
+ fset := goToken.NewFileSet()
+ f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
+ if err != nil {
+ return nil, err
+ }
+
+ f.Name = ast.NewIdent(pkgName)
+
+ // Complete an import statement.
+ for _, d := range f.Decls {
+ gd, ok := d.(*ast.GenDecl)
+ if !ok || gd.Tok != token.IMPORT {
+ continue
+ }
+ gd.Specs = append(gd.Specs, &ast.ImportSpec{
+ Path: &ast.BasicLit{
+ Value: `"io"`,
+ },
+ })
+ break
+ }
+
+ var b bytes.Buffer
+ err = format.Node(&b, fset, f)
+ if err != nil {
+ return nil, err
+ }
+
+ return b.Bytes(), nil
+}
+
+const grammarSrcTmplate = `
+type grammarImpl struct {
+ recoverProductions []int
+ action []int
+ goTo []int
+ alternativeSymbolCounts []int
+ errorTrapperStates []int
+ nonTerminals []string
+ lhsSymbols []int
+ terminals []string
+ terminalAliases []string
+ astActions [][]int
+}
+
+func NewGrammar() *grammarImpl {
+ return &grammarImpl{
+ recoverProductions: {{ genRecoverProductions }},
+ action: {{ genAction }},
+ goTo: {{ genGoTo }},
+ alternativeSymbolCounts: {{ genAlternativeSymbolCounts }},
+ errorTrapperStates: {{ genErrorTrapperStates }},
+ nonTerminals: {{ genNonTerminals }},
+ lhsSymbols: {{ genLHSSymbols }},
+ terminals: {{ genTerminals }},
+ terminalAliases: {{ genTerminalAliases }},
+ astActions: {{ genASTActions }},
+ }
+}
+
+func (g *grammarImpl) Class() string {
+ return "{{ .class }}"
+}
+
+func (g *grammarImpl) InitialState() int {
+ return {{ .initialState }}
+}
+
+func (g *grammarImpl) StartProduction() int {
+ return {{ .startProduction }}
+}
+
+func (g *grammarImpl) RecoverProduction(prod int) bool {
+ return g.recoverProductions[prod] != 0
+}
+
+func (g *grammarImpl) Action(state int, terminal int) int {
+ return g.action[state*{{ .terminalCount }}+terminal]
+}
+
+func (g *grammarImpl) GoTo(state int, lhs int) int {
+ return g.goTo[state*{{ .nonTerminalCount }}+lhs]
+}
+
+func (g *grammarImpl) AlternativeSymbolCount(prod int) int {
+ return g.alternativeSymbolCounts[prod]
+}
+
+func (g *grammarImpl) TerminalCount() int {
+ return {{ .terminalCount }}
+}
+
+func (g *grammarImpl) ErrorTrapperState(state int) bool {
+ return g.errorTrapperStates[state] != 0
+}
+
+func (g *grammarImpl) NonTerminal(nonTerminal int) string {
+ return g.nonTerminals[nonTerminal]
+}
+
+func (g *grammarImpl) LHS(prod int) int {
+ return g.lhsSymbols[prod]
+}
+
+func (g *grammarImpl) EOF() int {
+ return {{ .eofSymbol }}
+}
+
+func (g *grammarImpl) Error() int {
+ return {{ .errorSymbol }}
+}
+
+func (g *grammarImpl) Terminal(terminal int) string {
+ return g.terminals[terminal]
+}
+
+func (g *grammarImpl) TerminalAlias(terminal int) string {
+ return g.terminalAliases[terminal]
+}
+
+func (g *grammarImpl) ASTAction(prod int) []int {
+ return g.astActions[prod]
+}
+`
+
+func genGrammarTemplateFuncs(cgram *spec.CompiledGrammar) template.FuncMap {
+ return template.FuncMap{
+ "genRecoverProductions": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]int{\n")
+ c := 1
+ for _, v := range cgram.ParsingTable.RecoverProductions {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genAction": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]int{\n")
+ c := 1
+ for _, v := range cgram.ParsingTable.Action {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genGoTo": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]int{\n")
+ c := 1
+ for _, v := range cgram.ParsingTable.GoTo {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genAlternativeSymbolCounts": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]int{\n")
+ c := 1
+ for _, v := range cgram.ParsingTable.AlternativeSymbolCounts {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genErrorTrapperStates": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]int{\n")
+ c := 1
+ for _, v := range cgram.ParsingTable.ErrorTrapperStates {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genNonTerminals": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]string{\n")
+ for _, v := range cgram.ParsingTable.NonTerminals {
+ fmt.Fprintf(&b, "%v,\n", strconv.Quote(v))
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genLHSSymbols": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]int{\n")
+ c := 1
+ for _, v := range cgram.ParsingTable.LHSSymbols {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genTerminals": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]string{\n")
+ for _, v := range cgram.ParsingTable.Terminals {
+ fmt.Fprintf(&b, "%v,\n", strconv.Quote(v))
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genTerminalAliases": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]string{\n")
+ for _, v := range cgram.LexicalSpecification.Maleeni.KindAliases {
+ fmt.Fprintf(&b, "%v,\n", strconv.Quote(v))
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genASTActions": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[][]int{\n")
+ for _, entries := range cgram.ASTAction.Entries {
+ if len(entries) == 0 {
+ fmt.Fprintf(&b, "nil,\n")
+ continue
+ }
+
+ fmt.Fprintf(&b, "{\n")
+ c := 1
+ for _, v := range entries {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "},\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ }
+}
+
+const lexerSrcTmplate = `
+type vToken struct {
+ terminalID int
+ skip bool
+ tok *Token
+}
+
+func (t *vToken) TerminalID() int {
+ return t.terminalID
+}
+
+func (t *vToken) Lexeme() []byte {
+ return t.tok.Lexeme
+}
+
+func (t *vToken) EOF() bool {
+ return t.tok.EOF
+}
+
+func (t *vToken) Invalid() bool {
+ return t.tok.Invalid
+}
+
+func (t *vToken) Skip() bool {
+ return t.skip
+}
+
+func (t *vToken) Position() (int, int) {
+ return t.tok.Row, t.tok.Col
+}
+
+var kindToTerminal = {{ genKindToTerminal }}
+
+var skip = {{ genSkip }}
+
+type tokenStream struct {
+ lex *Lexer
+ kindToTerminal []int
+ skip []int
+}
+
+func NewTokenStream(src io.Reader) (*tokenStream, error) {
+ lex, err := NewLexer(NewLexSpec(), src)
+ if err != nil {
+ return nil, err
+ }
+
+ return &tokenStream{
+ lex: lex,
+ }, nil
+}
+
+func (t *tokenStream) Next() (VToken, error) {
+ tok, err := t.lex.Next()
+ if err != nil {
+ return nil, err
+ }
+ return &vToken{
+ terminalID: kindToTerminal[tok.KindID],
+ skip: skip[tok.KindID] > 0,
+ tok: tok,
+ }, nil
+}
+`
+
+func genLexerTemplateFuncs(cgram *spec.CompiledGrammar) template.FuncMap {
+ return template.FuncMap{
+ "genKindToTerminal": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]int{\n")
+ c := 1
+ for _, v := range cgram.LexicalSpecification.Maleeni.KindToTerminal {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ "genSkip": func() string {
+ var b strings.Builder
+ fmt.Fprintf(&b, "[]int{\n")
+ c := 1
+ for _, v := range cgram.LexicalSpecification.Maleeni.Skip {
+ fmt.Fprintf(&b, "%v, ", v)
+ if c == 20 {
+ fmt.Fprintf(&b, "\n")
+ c = 1
+ } else {
+ c++
+ }
+ }
+ if c > 1 {
+ fmt.Fprintf(&b, "\n")
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+ },
+ }
+}
+
+func GenSemanticAction(pkgName string) ([]byte, error) {
+ var src string
+ {
+ tmpl := `// Code generated by vartan-go. DO NOT EDIT.
+{{ .semActSrc }}
+`
+ t, err := template.New("").Parse(tmpl)
+ if err != nil {
+ return nil, err
+ }
+
+ var b strings.Builder
+ err = t.Execute(&b, map[string]string{
+ "semActSrc": semActSrc,
+ })
+ if err != nil {
+ return nil, err
+ }
+
+ src = b.String()
+ }
+
+ fset := goToken.NewFileSet()
+ f, err := parser.ParseFile(fset, "", src, parser.ParseComments)
+ if err != nil {
+ return nil, err
+ }
+
+ f.Name = ast.NewIdent(pkgName)
+
+ var b bytes.Buffer
+ err = format.Node(&b, fset, f)
+ if err != nil {
+ return nil, err
+ }
+
+ return b.Bytes(), nil
+}
diff --git a/driver/token_stream.go b/driver/token_stream.go
index feb86ae..97c9f1f 100644
--- a/driver/token_stream.go
+++ b/driver/token_stream.go
@@ -7,33 +7,33 @@ import (
"github.com/nihei9/vartan/spec"
)
-type token struct {
+type vToken struct {
terminalID int
skip bool
tok *mldriver.Token
}
-func (t *token) TerminalID() int {
+func (t *vToken) TerminalID() int {
return t.terminalID
}
-func (t *token) Lexeme() []byte {
+func (t *vToken) Lexeme() []byte {
return t.tok.Lexeme
}
-func (t *token) EOF() bool {
+func (t *vToken) EOF() bool {
return t.tok.EOF
}
-func (t *token) Invalid() bool {
+func (t *vToken) Invalid() bool {
return t.tok.Invalid
}
-func (t *token) Skip() bool {
+func (t *vToken) Skip() bool {
return t.skip
}
-func (t *token) Position() (int, int) {
+func (t *vToken) Position() (int, int) {
return t.tok.Row, t.tok.Col
}
@@ -56,12 +56,12 @@ func NewTokenStream(g *spec.CompiledGrammar, src io.Reader) (TokenStream, error)
}, nil
}
-func (l *tokenStream) Next() (Token, error) {
+func (l *tokenStream) Next() (VToken, error) {
tok, err := l.lex.Next()
if err != nil {
return nil, err
}
- return &token{
+ return &vToken{
terminalID: l.kindToTerminal[tok.KindID],
skip: l.skip[tok.KindID] > 0,
tok: tok,