diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2022-03-27 02:32:22 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2022-03-27 20:25:04 +0900 |
commit | d0431e3a435e2ad3180d945f66098c04ed0faf22 (patch) | |
tree | 2963de49b509e639153091cf259eae4cfc51814e /README.md | |
parent | Use a lexer via interface (diff) | |
download | cotia-d0431e3a435e2ad3180d945f66098c04ed0faf22.tar.gz cotia-d0431e3a435e2ad3180d945f66098c04ed0faf22.tar.xz |
Add vartan-go command
Diffstat (limited to 'README.md')
-rw-r--r-- | README.md | 279 |
1 files changed, 227 insertions, 52 deletions
@@ -1,80 +1,111 @@ # vartan -vartan provides a compiler that generates a LALR(1) or SLR(1) parsing table and a driver for golang. +vartan is a parser generator for golang and supports LALR(1) and SLR(1). vartan also provides a command to perform syntax analysis to allow easy debugging of your grammar. [](https://github.com/nihei9/vartan/actions/workflows/test.yml) -## Status - -🚧 Now Developing - ## Installation +Compiler: + ```sh $ go install github.com/nihei9/vartan/cmd/vartan@latest ``` +Code Generator: + +```sh +$ go install github.com/nihei9/vartan/cmd/vartan-go@latest +``` + ## Usage +### 1. Define your grammar + vartan uses BNF-like DSL to define your grammar. As an example, let's write a grammar that represents a simple expression. ``` +%name expr + +%left mul div +%left add sub + expr - : expr add_op term - | term - ; -term - : term mul_op factor - | factor - ; -factor - : number - | id - ; + : expr add expr + | expr sub expr + | expr mul expr + | expr div expr + | func_call + | integer + | id + | '(' expr ')' #ast #(expr $2) + ; +func_call + : id '(' args ')' #ast #(func_call $1 $3) + | id '(' ')' #ast #(func_call $1) + ; +args + : args ',' expr #ast #(args $1... $3) + | expr + ; whitespaces: "[\u{0009}\u{0020}]+" #skip; -number: "[0-9]+"; -id: "[A-Za-z_]+"; -add_op: '+'; -mul_op: '*'; +integer: "0|[1-9][0-9]*"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +add: '+'; +sub: '-'; +mul: '*'; +div: '/'; ``` Save the above grammar to a file in UTF-8. In this explanation, the file name is `expr.vr`. +⚠️ The input file must be encoded in UTF-8. + +### 2. Compile the grammar + Next, generate a parsing table using `vartan compile` command. ```sh $ vartan compile -g expr.vr -o expr.json +16 conflicts ``` +### 3. Debug + +#### 3.1. Parse + If you want to make sure that the grammar behaves as expected, you can use `vartan parse` command to try parse without implementing a driver. ⚠️ An encoding that `vartan parse` command and the driver can handle is only UTF-8. ```sh -$ echo -n 'foo + bar * baz * 100' | vartan parse expr.json +$ echo -n 'foo(10, bar(a)) + 99 * x' | vartan parse expr.json expr ├─ expr -│ └─ term -│ └─ factor -│ └─ id "foo" -├─ add_op "+" -└─ term - ├─ term - │ ├─ term - │ │ └─ factor - │ │ └─ id "bar" - │ ├─ mul_op "*" - │ └─ factor - │ └─ id "baz" - ├─ mul_op "*" - └─ factor - └─ number "100" +│ └─ func_call +│ ├─ id "foo" +│ └─ args +│ ├─ expr +│ │ └─ integer "10" +│ └─ expr +│ └─ func_call +│ ├─ id "bar" +│ └─ args +│ └─ expr +│ └─ id "a" +├─ add "+" +└─ expr + ├─ expr + │ └─ integer "99" + ├─ mul "*" + └─ expr + └─ id "x" ``` When `vartan parse` command successfully parses the input data, it prints a CST or an AST (if any). -## Debug +#### 3.2. Resolve conflicts `vartan compile` command also generates a description file having `-description.json` suffix along with a parsing table. This file describes each state in the parsing table in detail. If your grammar contains conflicts, see `Conflicts` and `States` sections of this file. Using `vartan show` command, you can see the description file in a readable format. @@ -86,27 +117,38 @@ LALR(1) # Conflicts -No conflict was detected. +16 conflicts were detected. # Terminals 1 - - <eof> 2 - - error - 3 - - whitespaces - 4 - - number - 5 - - id - 6 - - add_op (+) - 7 - - mul_op (*) + 3 - - x_1 (\() + 4 - - x_2 (\)) + 5 - - x_3 (,) + 6 - - whitespaces + 7 - - integer + 8 - - id + 9 2 l add (+) + 10 2 l sub (-) + 11 1 l mul (*) + 12 1 l div (/) # Productions 1 - - expr' → expr - 2 - - expr → expr + term - 3 - - expr → term - 4 - - term → term * factor - 5 - - term → factor - 6 - - factor → number - 7 - - factor → id + 2 2 l expr → expr + expr + 3 2 l expr → expr - expr + 4 1 l expr → expr * expr + 5 1 l expr → expr / expr + 6 - - expr → func_call + 7 - - expr → integer + 8 - - expr → id + 9 - - expr → \( expr \) + 10 - - func_call → id \( args \) + 11 - - func_call → id \( \) + 12 - - args → args , expr + 13 - - args → expr # States @@ -114,11 +156,144 @@ No conflict was detected. 1 expr' → ・ expr -shift 4 on number +shift 3 on \( +shift 4 on integer shift 5 on id goto 1 on expr -goto 2 on term -goto 3 on factor +goto 2 on func_call + + +## State 1 + + 1 expr' → expr ・ + 2 expr → expr ・ + expr + 3 expr → expr ・ - expr + 4 expr → expr ・ * expr + 5 expr → expr ・ / expr + +shift 6 on + +shift 7 on - +shift 8 on * +shift 9 on / +reduce 1 on <eof> + + +## State 2 + + 6 expr → func_call ・ + +reduce 6 on <eof>, \), ,, +, -, *, / ... ``` + +### 4. Generate a parser + +Using `vartan-go` command, you can generate a source code of a parser to recognize your grammar. + +```sh +$ vartan-go expr.json +``` + +Then you will get the following files. + +* `expr_parser.go` +* `expr_lexer.go` +* `expr_semantic_action.go` + +You need to implement a driver to use the parser. An example is below. + +```go +package main + +import ( + "fmt" + "io" + "os" +) + +func main() { + toks, err := NewTokenStream(os.Stdin) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + gram := NewGrammar() + treeAct := NewSyntaxTreeActionSet(gram, true, false) + p, err := NewParser(toks, gram, SemanticAction(treeAct)) + if err != nil { + fmt.Println(err) + os.Exit(1) + } + err = p.Parse() + if err != nil { + fmt.Println(err) + os.Exit(1) + } + synErrs := p.SyntaxErrors() + if len(synErrs) > 0 { + for _, synErr := range synErrs { + printSyntaxError(os.Stderr, synErr, gram) + } + os.Exit(1) + } + fmt.Println("accepted") + PrintTree(os.Stdout, treeAct.AST()) +} + +func printSyntaxError(w io.Writer, synErr *SyntaxError, gram Grammar) { + var msg string + tok := synErr.Token + switch { + case tok.EOF(): + msg = "<eof>" + case tok.Invalid(): + msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme())) + default: + if alias := gram.TerminalAlias(tok.TerminalID()); alias != "" { + msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), alias) + } else { + msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID())) + } + } + fmt.Fprintf(w, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg) + + if len(synErr.ExpectedTerminals) > 0 { + fmt.Fprintf(w, "; expected: %v", synErr.ExpectedTerminals[0]) + for _, t := range synErr.ExpectedTerminals[1:] { + fmt.Fprintf(w, ", %v", t) + } + } + + fmt.Fprintf(w, "\n") +} +``` + +Please save the above source code to `main.go` and create a directory structure like the following. + +``` +/project_root +├── expr_parser.go +├── expr_lexer.go +├── expr_semantic_action.go +└── main.go (the driver you implemented) +``` + +Now, you can perform the syntax analysis. + +```sh +$ echo -n 'foo+99' | go run . +accepted +expr +├─ expr +│ └─ id "foo" +├─ add "+" +└─ expr + └─ integer "99" +``` + +```sh +$ echo -n 'foo+99?' | go run . +1:7: unexpected token: '?' (<invalid>); expected: <eof>, +, -, *, / +exit status 1 +``` |