diff options
-rw-r--r-- | README.md | 32 | ||||
-rw-r--r-- | cmd/vartan/parse.go | 17 | ||||
-rw-r--r-- | driver/parser_test.go | 20 | ||||
-rw-r--r-- | driver/semantic_action.go | 22 | ||||
-rw-r--r-- | driver/syntax_error_test.go | 170 | ||||
-rw-r--r-- | grammar/grammar.go | 12 | ||||
-rw-r--r-- | spec/grammar/grammar.go | 1 | ||||
-rw-r--r-- | spec/test/parser.go | 18 |
8 files changed, 249 insertions, 43 deletions
@@ -123,7 +123,7 @@ $ vartan show expr-report.json ### 4. Test -`vartan-test` command allows you to test whether your grammar recognizes an input text as a syntax tree with an expected structure. To do so, you need to define a test case as follows. +`vartan test` command allows you to test whether your grammar recognizes an input text as a syntax tree with an expected structure. To do so, you need to define a test case as follows. ``` This is an example. @@ -144,11 +144,11 @@ The test case consists of a description, an input text, and a syntax tree you ex Save the above test case to `test.txt` file and run the following command. ```sh -$ vartan-test expr.vartan test.txt +$ vartan test expr.vartan test.txt Passed test.txt ``` -When you specify a directory as the 2nd argument of `vartan-test` command, it will run all test cases in the directory. +When you specify a directory as the 2nd argument of `vartan test` command, it will run all test cases in the directory. ### 5. Generate a parser @@ -213,16 +213,20 @@ func printSyntaxError(w io.Writer, synErr *SyntaxError, gram Grammar) { case tok.Invalid(): msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme())) default: - if alias := gram.TerminalAlias(tok.TerminalID()); alias != "" { - msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), alias) + if term := gram.Terminal(tok.TerminalID()); term != "" { + if alias := gram.TerminalAlias(tok.TerminalID()); alias != "" { + msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), alias) + } else { + msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), term) + } } else { - msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID())) + msg = fmt.Sprintf("'%v'", string(tok.Lexeme())) } } fmt.Fprintf(w, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg) if len(synErr.ExpectedTerminals) > 0 { - fmt.Fprintf(w, "; expected: %v", synErr.ExpectedTerminals[0]) + fmt.Fprintf(w, ": expected: %v", synErr.ExpectedTerminals[0]) for _, t := range synErr.ExpectedTerminals[1:] { fmt.Fprintf(w, ", %v", t) } @@ -257,7 +261,7 @@ expr ```sh $ echo -n 'foo+99?' | go run . -1:7: unexpected token: '?' (<invalid>); expected: <eof>, +, -, *, / +1:7: unexpected token: '?' (<invalid>): expected: <eof>, +, -, *, / exit status 1 ``` @@ -714,16 +718,16 @@ In the following example, you can see the parser print syntax error messages and ``` $ echo -n 'x; x =; x = 1;' | vartan parse example.json -1:2: unexpected token: ';' (x_2); expected: = -1:7: unexpected token: ';' (x_2); expected: int +1:2: unexpected token: ';': expected: = +1:7: unexpected token: ';': expected: int statements ├─ statement -│ ├─ !error -│ └─ x_2 ";" +│ ├─ error +│ └─ <anonymous> ";" ├─ statement -│ ├─ !error -│ └─ x_2 ";" +│ ├─ error +│ └─ <anonymous> ";" └─ statement ├─ name "x" └─ int "1" diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go index d1fc80e..3f895c9 100644 --- a/cmd/vartan/parse.go +++ b/cmd/vartan/parse.go @@ -112,14 +112,7 @@ func runParse(cmd *cobra.Command, args []string) error { if !*parseFlags.onlyParse { // A parser can construct a parse tree even if syntax errors occur. // When therer is a parse tree, print it. - - var tree *driver.Node - if *parseFlags.cst { - tree = tb.Tree() - } else { - tree = tb.Tree() - } - if tree != nil { + if tree := tb.Tree(); tree != nil { switch *parseFlags.format { case "tree": b := tester.ConvertSyntaxTreeToTestableTree(tree).Format() @@ -179,10 +172,14 @@ func writeSyntaxErrorMessage(b *strings.Builder, cgram *spec.CompiledGrammar, sy case tok.Invalid(): fmt.Fprintf(b, "'%v' (<invalid>)", string(tok.Lexeme())) default: - fmt.Fprintf(b, "'%v' (%v)", string(tok.Lexeme()), cgram.ParsingTable.Terminals[tok.TerminalID()]) + if kind := cgram.ParsingTable.Terminals[tok.TerminalID()]; kind != "" { + fmt.Fprintf(b, "'%v' (%v)", string(tok.Lexeme()), kind) + } else { + fmt.Fprintf(b, "'%v'", string(tok.Lexeme())) + } } - fmt.Fprintf(b, "; expected: %v", synErr.ExpectedTerminals[0]) + fmt.Fprintf(b, ": expected: %v", synErr.ExpectedTerminals[0]) for _, t := range synErr.ExpectedTerminals[1:] { fmt.Fprintf(b, ", %v", t) } diff --git a/driver/parser_test.go b/driver/parser_test.go index 5c7addd..9e232f7 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -18,6 +18,10 @@ func termNode(kind string, text string, children ...*Node) *Node { } } +func anonTermNode(text string, children ...*Node) *Node { + return termNode("", text, children...) +} + func errorNode() *Node { return &Node{ Type: NodeTypeError, @@ -65,7 +69,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; nonTermNode("term", nonTermNode("term", nonTermNode("factor", - termNode("x_3", "("), + anonTermNode("("), nonTermNode("expr", nonTermNode("expr", nonTermNode("term", @@ -74,10 +78,10 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; ), ), ), - termNode("x_1", "+"), + anonTermNode("+"), nonTermNode("term", nonTermNode("factor", - termNode("x_3", "("), + anonTermNode("("), nonTermNode("expr", nonTermNode("expr", nonTermNode("term", @@ -86,27 +90,27 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; ), ), ), - termNode("x_1", "+"), + anonTermNode("+"), nonTermNode("term", nonTermNode("factor", termNode("id", "c"), ), ), ), - termNode("x_4", ")"), + anonTermNode(")"), ), ), ), - termNode("x_4", ")"), + anonTermNode(")"), ), ), - termNode("x_2", "*"), + anonTermNode("*"), nonTermNode("factor", termNode("id", "d"), ), ), ), - termNode("x_1", "+"), + anonTermNode("+"), nonTermNode("term", nonTermNode("factor", termNode("id", "e"), diff --git a/driver/semantic_action.go b/driver/semantic_action.go index 73f3bb0..54d3291 100644 --- a/driver/semantic_action.go +++ b/driver/semantic_action.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "io" + "strconv" ) // SemanticActionSet is a set of semantic actions a parser calls. @@ -270,6 +271,19 @@ func (n *Node) MarshalJSON() ([]byte, error) { KindName: n.KindName, }) case NodeTypeTerminal: + if n.KindName == "" { + return json.Marshal(struct { + Type NodeType `json:"type"` + Text string `json:"text"` + Row int `json:"row"` + Col int `json:"col"` + }{ + Type: n.Type, + Text: n.Text, + Row: n.Row, + Col: n.Col, + }) + } return json.Marshal(struct { Type NodeType `json:"type"` KindName string `json:"kind_name"` @@ -324,9 +338,13 @@ func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix s switch node.Type { case NodeTypeError: - fmt.Fprintf(w, "%v!%v\n", ruledLine, node.KindName) + fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) case NodeTypeTerminal: - fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text) + if node.KindName == "" { + fmt.Fprintf(w, "%v<anonymous> %v\n", ruledLine, strconv.Quote(node.Text)) + } else { + fmt.Fprintf(w, "%v%v %v\n", ruledLine, node.KindName, strconv.Quote(node.Text)) + } case NodeTypeNonTerminal: fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go index 1480390..f68f595 100644 --- a/driver/syntax_error_test.go +++ b/driver/syntax_error_test.go @@ -2,6 +2,7 @@ package driver import ( "fmt" + "sort" "strings" "testing" @@ -153,3 +154,172 @@ c }) } } + +func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) { + tests := []struct { + caption string + specSrc string + src string + cause string + expected []string + }{ + { + caption: "the parser reports an expected lookahead symbol", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +`, + src: `bar`, + cause: `bar`, + expected: []string{ + "foo", + }, + }, + { + caption: "the parser reports expected lookahead symbols", + specSrc: ` +#name test; + +s + : foo + | bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + src: `baz`, + cause: `baz`, + expected: []string{ + "foo", + "bar", + }, + }, + { + caption: "the parser may report the EOF as an expected lookahead symbol", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +`, + src: `foobar`, + cause: `bar`, + expected: []string{ + "<eof>", + }, + }, + { + caption: "the parser may report the EOF and others as expected lookahead symbols", + specSrc: ` +#name test; + +s + : foo + | + ; + +foo + : 'foo'; +`, + src: `bar`, + cause: `bar`, + expected: []string{ + "foo", + "<eof>", + }, + }, + { + caption: "when an anonymous symbol is expected, an expected symbol list contains an alias of the anonymous symbol", + specSrc: ` +#name test; + +s + : foo 'bar' + ; + +foo + : 'foo'; +`, + src: `foobaz`, + cause: `baz`, + expected: []string{ + "bar", + }, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := spec.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + g, err := b.Build() + if err != nil { + t.Fatal(err) + } + + gram, _, err := grammar.Compile(g) + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + synErrs := p.SyntaxErrors() + if synErrs == nil { + t.Fatalf("expected one syntax error, but it didn't occur") + } + if len(synErrs) != 1 { + t.Fatalf("too many syntax errors: %v errors", len(synErrs)) + } + synErr := synErrs[0] + if string(synErr.Token.Lexeme()) != tt.cause { + t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme())) + } + if len(synErr.ExpectedTerminals) != len(tt.expected) { + t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals) + } + sort.Slice(tt.expected, func(i, j int) bool { + return tt.expected[i] < tt.expected[j] + }) + sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool { + return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j] + }) + for i, e := range tt.expected { + if synErr.ExpectedTerminals[i] != e { + t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i]) + } + } + }) + } +} diff --git a/grammar/grammar.go b/grammar/grammar.go index 65d232d..80846d9 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -1317,12 +1317,10 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec } kind2Term := make([]int, len(lexSpec.KindNames)) - term2Kind := make([]int, gram.symbolTable.termNum.Int()) skip := make([]int, len(lexSpec.KindNames)) for i, k := range lexSpec.KindNames { if k == mlspec.LexKindNameNil { kind2Term[mlspec.LexKindIDNil] = symbolNil.num().Int() - term2Kind[symbolNil.num()] = mlspec.LexKindIDNil.Int() continue } @@ -1331,7 +1329,6 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec return nil, nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k) } kind2Term[i] = sym.num().Int() - term2Kind[sym.num()] = i for _, sk := range gram.skipLexKinds { if k != sk { @@ -1342,10 +1339,16 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec } } - terms, err := gram.symbolTable.terminalTexts() + termTexts, err := gram.symbolTable.terminalTexts() if err != nil { return nil, nil, err } + terms := make([]string, len(termTexts)) + for i, t := range termTexts { + if !strings.HasPrefix(t, "x_") { + terms[i] = t + } + } kindAliases := make([]string, gram.symbolTable.termNum.Int()) for _, sym := range gram.symbolTable.terminalSymbols() { @@ -1440,7 +1443,6 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec Maleeni: &spec.Maleeni{ Spec: lexSpec, KindToTerminal: kind2Term, - TerminalToKind: term2Kind, Skip: skip, KindAliases: kindAliases, }, diff --git a/spec/grammar/grammar.go b/spec/grammar/grammar.go index 25266e5..af87504 100644 --- a/spec/grammar/grammar.go +++ b/spec/grammar/grammar.go @@ -17,7 +17,6 @@ type LexicalSpecification struct { type Maleeni struct { Spec *mlspec.CompiledLexSpec `json:"spec"` KindToTerminal []int `json:"kind_to_terminal"` - TerminalToKind []int `json:"terminal_to_kind"` Skip []int `json:"skip"` KindAliases []string `json:"kind_aliases"` } diff --git a/spec/test/parser.go b/spec/test/parser.go index 175c89e..483e553 100644 --- a/spec/test/parser.go +++ b/spec/test/parser.go @@ -68,7 +68,11 @@ func (t *Tree) format(buf *bytes.Buffer, depth int) { buf.WriteString(" ") } buf.WriteString("(") - buf.WriteString(t.Kind) + if t.Kind == "" { + buf.WriteString("<anonymous>") + } else { + buf.WriteString(t.Kind) + } if len(t.Children) > 0 { buf.WriteString("\n") for i, c := range t.Children { @@ -228,9 +232,17 @@ func formatSyntaxError(synErr *SyntaxError, gram Grammar) []byte { case tok.Invalid(): b.WriteString(fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme()))) default: - b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID()))) + if term := gram.Terminal(tok.TerminalID()); term != "" { + if alias := gram.TerminalAlias(tok.TerminalID()); alias != "" { + b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), alias)) + } else { + b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), term)) + } + } else { + b.WriteString(fmt.Sprintf("'%v'", string(tok.Lexeme()))) + } } - b.WriteString(fmt.Sprintf("; expected: %v", synErr.ExpectedTerminals[0])) + b.WriteString(fmt.Sprintf(": expected: %v", synErr.ExpectedTerminals[0])) for _, t := range synErr.ExpectedTerminals[1:] { b.WriteString(fmt.Sprintf(", %v", t)) } |