aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md32
-rw-r--r--cmd/vartan/parse.go17
-rw-r--r--driver/parser_test.go20
-rw-r--r--driver/semantic_action.go22
-rw-r--r--driver/syntax_error_test.go170
-rw-r--r--grammar/grammar.go12
-rw-r--r--spec/grammar/grammar.go1
-rw-r--r--spec/test/parser.go18
8 files changed, 249 insertions, 43 deletions
diff --git a/README.md b/README.md
index 3d9e5f5..222ee62 100644
--- a/README.md
+++ b/README.md
@@ -123,7 +123,7 @@ $ vartan show expr-report.json
### 4. Test
-`vartan-test` command allows you to test whether your grammar recognizes an input text as a syntax tree with an expected structure. To do so, you need to define a test case as follows.
+`vartan test` command allows you to test whether your grammar recognizes an input text as a syntax tree with an expected structure. To do so, you need to define a test case as follows.
```
This is an example.
@@ -144,11 +144,11 @@ The test case consists of a description, an input text, and a syntax tree you ex
Save the above test case to `test.txt` file and run the following command.
```sh
-$ vartan-test expr.vartan test.txt
+$ vartan test expr.vartan test.txt
Passed test.txt
```
-When you specify a directory as the 2nd argument of `vartan-test` command, it will run all test cases in the directory.
+When you specify a directory as the 2nd argument of `vartan test` command, it will run all test cases in the directory.
### 5. Generate a parser
@@ -213,16 +213,20 @@ func printSyntaxError(w io.Writer, synErr *SyntaxError, gram Grammar) {
case tok.Invalid():
msg = fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme()))
default:
- if alias := gram.TerminalAlias(tok.TerminalID()); alias != "" {
- msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), alias)
+ if term := gram.Terminal(tok.TerminalID()); term != "" {
+ if alias := gram.TerminalAlias(tok.TerminalID()); alias != "" {
+ msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), alias)
+ } else {
+ msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), term)
+ }
} else {
- msg = fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID()))
+ msg = fmt.Sprintf("'%v'", string(tok.Lexeme()))
}
}
fmt.Fprintf(w, "%v:%v: %v: %v", synErr.Row+1, synErr.Col+1, synErr.Message, msg)
if len(synErr.ExpectedTerminals) > 0 {
- fmt.Fprintf(w, "; expected: %v", synErr.ExpectedTerminals[0])
+ fmt.Fprintf(w, ": expected: %v", synErr.ExpectedTerminals[0])
for _, t := range synErr.ExpectedTerminals[1:] {
fmt.Fprintf(w, ", %v", t)
}
@@ -257,7 +261,7 @@ expr
```sh
$ echo -n 'foo+99?' | go run .
-1:7: unexpected token: '?' (<invalid>); expected: <eof>, +, -, *, /
+1:7: unexpected token: '?' (<invalid>): expected: <eof>, +, -, *, /
exit status 1
```
@@ -714,16 +718,16 @@ In the following example, you can see the parser print syntax error messages and
```
$ echo -n 'x; x =; x = 1;' | vartan parse example.json
-1:2: unexpected token: ';' (x_2); expected: =
-1:7: unexpected token: ';' (x_2); expected: int
+1:2: unexpected token: ';': expected: =
+1:7: unexpected token: ';': expected: int
statements
├─ statement
-│ ├─ !error
-│ └─ x_2 ";"
+│ ├─ error
+│ └─ <anonymous> ";"
├─ statement
-│ ├─ !error
-│ └─ x_2 ";"
+│ ├─ error
+│ └─ <anonymous> ";"
└─ statement
├─ name "x"
└─ int "1"
diff --git a/cmd/vartan/parse.go b/cmd/vartan/parse.go
index d1fc80e..3f895c9 100644
--- a/cmd/vartan/parse.go
+++ b/cmd/vartan/parse.go
@@ -112,14 +112,7 @@ func runParse(cmd *cobra.Command, args []string) error {
if !*parseFlags.onlyParse {
// A parser can construct a parse tree even if syntax errors occur.
// When therer is a parse tree, print it.
-
- var tree *driver.Node
- if *parseFlags.cst {
- tree = tb.Tree()
- } else {
- tree = tb.Tree()
- }
- if tree != nil {
+ if tree := tb.Tree(); tree != nil {
switch *parseFlags.format {
case "tree":
b := tester.ConvertSyntaxTreeToTestableTree(tree).Format()
@@ -179,10 +172,14 @@ func writeSyntaxErrorMessage(b *strings.Builder, cgram *spec.CompiledGrammar, sy
case tok.Invalid():
fmt.Fprintf(b, "'%v' (<invalid>)", string(tok.Lexeme()))
default:
- fmt.Fprintf(b, "'%v' (%v)", string(tok.Lexeme()), cgram.ParsingTable.Terminals[tok.TerminalID()])
+ if kind := cgram.ParsingTable.Terminals[tok.TerminalID()]; kind != "" {
+ fmt.Fprintf(b, "'%v' (%v)", string(tok.Lexeme()), kind)
+ } else {
+ fmt.Fprintf(b, "'%v'", string(tok.Lexeme()))
+ }
}
- fmt.Fprintf(b, "; expected: %v", synErr.ExpectedTerminals[0])
+ fmt.Fprintf(b, ": expected: %v", synErr.ExpectedTerminals[0])
for _, t := range synErr.ExpectedTerminals[1:] {
fmt.Fprintf(b, ", %v", t)
}
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 5c7addd..9e232f7 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -18,6 +18,10 @@ func termNode(kind string, text string, children ...*Node) *Node {
}
}
+func anonTermNode(text string, children ...*Node) *Node {
+ return termNode("", text, children...)
+}
+
func errorNode() *Node {
return &Node{
Type: NodeTypeError,
@@ -65,7 +69,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
nonTermNode("term",
nonTermNode("term",
nonTermNode("factor",
- termNode("x_3", "("),
+ anonTermNode("("),
nonTermNode("expr",
nonTermNode("expr",
nonTermNode("term",
@@ -74,10 +78,10 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
),
),
),
- termNode("x_1", "+"),
+ anonTermNode("+"),
nonTermNode("term",
nonTermNode("factor",
- termNode("x_3", "("),
+ anonTermNode("("),
nonTermNode("expr",
nonTermNode("expr",
nonTermNode("term",
@@ -86,27 +90,27 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
),
),
),
- termNode("x_1", "+"),
+ anonTermNode("+"),
nonTermNode("term",
nonTermNode("factor",
termNode("id", "c"),
),
),
),
- termNode("x_4", ")"),
+ anonTermNode(")"),
),
),
),
- termNode("x_4", ")"),
+ anonTermNode(")"),
),
),
- termNode("x_2", "*"),
+ anonTermNode("*"),
nonTermNode("factor",
termNode("id", "d"),
),
),
),
- termNode("x_1", "+"),
+ anonTermNode("+"),
nonTermNode("term",
nonTermNode("factor",
termNode("id", "e"),
diff --git a/driver/semantic_action.go b/driver/semantic_action.go
index 73f3bb0..54d3291 100644
--- a/driver/semantic_action.go
+++ b/driver/semantic_action.go
@@ -4,6 +4,7 @@ import (
"encoding/json"
"fmt"
"io"
+ "strconv"
)
// SemanticActionSet is a set of semantic actions a parser calls.
@@ -270,6 +271,19 @@ func (n *Node) MarshalJSON() ([]byte, error) {
KindName: n.KindName,
})
case NodeTypeTerminal:
+ if n.KindName == "" {
+ return json.Marshal(struct {
+ Type NodeType `json:"type"`
+ Text string `json:"text"`
+ Row int `json:"row"`
+ Col int `json:"col"`
+ }{
+ Type: n.Type,
+ Text: n.Text,
+ Row: n.Row,
+ Col: n.Col,
+ })
+ }
return json.Marshal(struct {
Type NodeType `json:"type"`
KindName string `json:"kind_name"`
@@ -324,9 +338,13 @@ func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix s
switch node.Type {
case NodeTypeError:
- fmt.Fprintf(w, "%v!%v\n", ruledLine, node.KindName)
+ fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
case NodeTypeTerminal:
- fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text)
+ if node.KindName == "" {
+ fmt.Fprintf(w, "%v<anonymous> %v\n", ruledLine, strconv.Quote(node.Text))
+ } else {
+ fmt.Fprintf(w, "%v%v %v\n", ruledLine, node.KindName, strconv.Quote(node.Text))
+ }
case NodeTypeNonTerminal:
fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go
index 1480390..f68f595 100644
--- a/driver/syntax_error_test.go
+++ b/driver/syntax_error_test.go
@@ -2,6 +2,7 @@ package driver
import (
"fmt"
+ "sort"
"strings"
"testing"
@@ -153,3 +154,172 @@ c
})
}
}
+
+func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) {
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ cause string
+ expected []string
+ }{
+ {
+ caption: "the parser reports an expected lookahead symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `bar`,
+ cause: `bar`,
+ expected: []string{
+ "foo",
+ },
+ },
+ {
+ caption: "the parser reports expected lookahead symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ | bar
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ src: `baz`,
+ cause: `baz`,
+ expected: []string{
+ "foo",
+ "bar",
+ },
+ },
+ {
+ caption: "the parser may report the EOF as an expected lookahead symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `foobar`,
+ cause: `bar`,
+ expected: []string{
+ "<eof>",
+ },
+ },
+ {
+ caption: "the parser may report the EOF and others as expected lookahead symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ |
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `bar`,
+ cause: `bar`,
+ expected: []string{
+ "foo",
+ "<eof>",
+ },
+ },
+ {
+ caption: "when an anonymous symbol is expected, an expected symbol list contains an alias of the anonymous symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo 'bar'
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `foobaz`,
+ cause: `baz`,
+ expected: []string{
+ "bar",
+ },
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := spec.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ g, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ gram, _, err := grammar.Compile(g)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ synErrs := p.SyntaxErrors()
+ if synErrs == nil {
+ t.Fatalf("expected one syntax error, but it didn't occur")
+ }
+ if len(synErrs) != 1 {
+ t.Fatalf("too many syntax errors: %v errors", len(synErrs))
+ }
+ synErr := synErrs[0]
+ if string(synErr.Token.Lexeme()) != tt.cause {
+ t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme()))
+ }
+ if len(synErr.ExpectedTerminals) != len(tt.expected) {
+ t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals)
+ }
+ sort.Slice(tt.expected, func(i, j int) bool {
+ return tt.expected[i] < tt.expected[j]
+ })
+ sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool {
+ return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j]
+ })
+ for i, e := range tt.expected {
+ if synErr.ExpectedTerminals[i] != e {
+ t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i])
+ }
+ }
+ })
+ }
+}
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 65d232d..80846d9 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -1317,12 +1317,10 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
}
kind2Term := make([]int, len(lexSpec.KindNames))
- term2Kind := make([]int, gram.symbolTable.termNum.Int())
skip := make([]int, len(lexSpec.KindNames))
for i, k := range lexSpec.KindNames {
if k == mlspec.LexKindNameNil {
kind2Term[mlspec.LexKindIDNil] = symbolNil.num().Int()
- term2Kind[symbolNil.num()] = mlspec.LexKindIDNil.Int()
continue
}
@@ -1331,7 +1329,6 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
return nil, nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k)
}
kind2Term[i] = sym.num().Int()
- term2Kind[sym.num()] = i
for _, sk := range gram.skipLexKinds {
if k != sk {
@@ -1342,10 +1339,16 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
}
}
- terms, err := gram.symbolTable.terminalTexts()
+ termTexts, err := gram.symbolTable.terminalTexts()
if err != nil {
return nil, nil, err
}
+ terms := make([]string, len(termTexts))
+ for i, t := range termTexts {
+ if !strings.HasPrefix(t, "x_") {
+ terms[i] = t
+ }
+ }
kindAliases := make([]string, gram.symbolTable.termNum.Int())
for _, sym := range gram.symbolTable.terminalSymbols() {
@@ -1440,7 +1443,6 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
Maleeni: &spec.Maleeni{
Spec: lexSpec,
KindToTerminal: kind2Term,
- TerminalToKind: term2Kind,
Skip: skip,
KindAliases: kindAliases,
},
diff --git a/spec/grammar/grammar.go b/spec/grammar/grammar.go
index 25266e5..af87504 100644
--- a/spec/grammar/grammar.go
+++ b/spec/grammar/grammar.go
@@ -17,7 +17,6 @@ type LexicalSpecification struct {
type Maleeni struct {
Spec *mlspec.CompiledLexSpec `json:"spec"`
KindToTerminal []int `json:"kind_to_terminal"`
- TerminalToKind []int `json:"terminal_to_kind"`
Skip []int `json:"skip"`
KindAliases []string `json:"kind_aliases"`
}
diff --git a/spec/test/parser.go b/spec/test/parser.go
index 175c89e..483e553 100644
--- a/spec/test/parser.go
+++ b/spec/test/parser.go
@@ -68,7 +68,11 @@ func (t *Tree) format(buf *bytes.Buffer, depth int) {
buf.WriteString(" ")
}
buf.WriteString("(")
- buf.WriteString(t.Kind)
+ if t.Kind == "" {
+ buf.WriteString("<anonymous>")
+ } else {
+ buf.WriteString(t.Kind)
+ }
if len(t.Children) > 0 {
buf.WriteString("\n")
for i, c := range t.Children {
@@ -228,9 +232,17 @@ func formatSyntaxError(synErr *SyntaxError, gram Grammar) []byte {
case tok.Invalid():
b.WriteString(fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme())))
default:
- b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID())))
+ if term := gram.Terminal(tok.TerminalID()); term != "" {
+ if alias := gram.TerminalAlias(tok.TerminalID()); alias != "" {
+ b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), alias))
+ } else {
+ b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), term))
+ }
+ } else {
+ b.WriteString(fmt.Sprintf("'%v'", string(tok.Lexeme())))
+ }
}
- b.WriteString(fmt.Sprintf("; expected: %v", synErr.ExpectedTerminals[0]))
+ b.WriteString(fmt.Sprintf(": expected: %v", synErr.ExpectedTerminals[0]))
for _, t := range synErr.ExpectedTerminals[1:] {
b.WriteString(fmt.Sprintf(", %v", t))
}