aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md31
-rw-r--r--cmd/vartan/test.go65
-rw-r--r--spec/test/parser.go226
-rw-r--r--spec/test/parser_test.go300
-rw-r--r--spec/test/tree-report.json1
-rw-r--r--spec/test/tree.json1
-rw-r--r--spec/test/tree.vartan25
-rw-r--r--spec/test/tree_lexer.go549
-rw-r--r--spec/test/tree_parser.go638
-rw-r--r--spec/test/tree_semantic_action.go353
-rw-r--r--tester/tester.go177
-rw-r--r--tester/tester_test.go170
12 files changed, 2535 insertions, 1 deletions
diff --git a/README.md b/README.md
index 900fa28..3d9e5f5 100644
--- a/README.md
+++ b/README.md
@@ -121,7 +121,36 @@ When `vartan parse` command successfully parses the input data, it prints a CST
$ vartan show expr-report.json
```
-### 4. Generate a parser
+### 4. Test
+
+`vartan-test` command allows you to test whether your grammar recognizes an input text as a syntax tree with an expected structure. To do so, you need to define a test case as follows.
+
+```
+This is an example.
+---
+a / b * 100
+---
+(expr
+ (expr
+ (expr (id))
+ (div)
+ (expr (id)))
+ (mul)
+ (expr (int)))
+```
+
+The test case consists of a description, an input text, and a syntax tree you expect. Each part is separated by the delimiter `---`. The syntax tree is represented by the syntax like an [S-expression](https://en.wikipedia.org/wiki/S-expression).
+
+Save the above test case to `test.txt` file and run the following command.
+
+```sh
+$ vartan-test expr.vartan test.txt
+Passed test.txt
+```
+
+When you specify a directory as the 2nd argument of `vartan-test` command, it will run all test cases in the directory.
+
+### 5. Generate a parser
Using `vartan-go` command, you can generate a source code of a parser to recognize your grammar.
diff --git a/cmd/vartan/test.go b/cmd/vartan/test.go
new file mode 100644
index 0000000..50ba8ca
--- /dev/null
+++ b/cmd/vartan/test.go
@@ -0,0 +1,65 @@
+package main
+
+import (
+ "errors"
+ "fmt"
+ "os"
+
+ "github.com/nihei9/vartan/grammar"
+ "github.com/nihei9/vartan/tester"
+ "github.com/spf13/cobra"
+)
+
+func init() {
+ cmd := &cobra.Command{
+ Use: "test <grammar file path> <test file path>|<test directory path>",
+ Short: "Test a grammar",
+ Example: ` vartan test grammar.vartan test`,
+ Args: cobra.ExactArgs(2),
+ RunE: runTest,
+ }
+ rootCmd.AddCommand(cmd)
+}
+
+func runTest(cmd *cobra.Command, args []string) error {
+ g, err := readGrammar(args[0])
+ if err != nil {
+ return fmt.Errorf("Cannot read a grammar: %w", err)
+ }
+ cg, _, err := grammar.Compile(g)
+ if err != nil {
+ return fmt.Errorf("Cannot read a compiled grammar: %w", err)
+ }
+
+ var cs []*tester.TestCaseWithMetadata
+ {
+ cs = tester.ListTestCases(args[1])
+ errOccurred := false
+ for _, c := range cs {
+ if c.Error != nil {
+ fmt.Fprintf(os.Stderr, "Failed to read a test case or a directory: %v\n%v\n", c.FilePath, c.Error)
+ errOccurred = true
+ }
+ }
+ if errOccurred {
+ return errors.New("Cannot run test")
+ }
+ }
+
+ t := &tester.Tester{
+ Grammar: cg,
+ Cases: cs,
+ }
+ rs := t.Run()
+ testFailed := false
+ for _, r := range rs {
+ fmt.Fprintln(os.Stdout, r)
+ if r.Error != nil {
+ testFailed = true
+ }
+ }
+ if testFailed {
+ return errors.New("Test failed")
+ }
+ return nil
+}
diff --git a/spec/test/parser.go b/spec/test/parser.go
new file mode 100644
index 0000000..0513ee3
--- /dev/null
+++ b/spec/test/parser.go
@@ -0,0 +1,226 @@
+//go:generate vartan compile tree.vartan -o tree.json
+//go:generate vartan-go tree.json --package test
+
+package test
+
+import (
+ "bufio"
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "regexp"
+ "strings"
+)
+
+type TreeDiff struct {
+ ExpectedPath string
+ ActualPath string
+ Message string
+}
+
+func newTreeDiff(expected, actual *Tree, message string) *TreeDiff {
+ return &TreeDiff{
+ ExpectedPath: expected.path(),
+ ActualPath: actual.path(),
+ Message: message,
+ }
+}
+
+type Tree struct {
+ Parent *Tree
+ Offset int
+ Kind string
+ Children []*Tree
+}
+
+func NewTree(kind string, children ...*Tree) *Tree {
+ return &Tree{
+ Kind: kind,
+ Children: children,
+ }
+}
+
+func (t *Tree) Fill() *Tree {
+ for i, c := range t.Children {
+ c.Parent = t
+ c.Offset = i
+ c.Fill()
+ }
+ return t
+}
+
+func (t *Tree) path() string {
+ if t.Parent == nil {
+ return t.Kind
+ }
+ return fmt.Sprintf("%v.[%v]%v", t.Parent.path(), t.Offset, t.Kind)
+}
+
+func DiffTree(expected, actual *Tree) []*TreeDiff {
+ if expected == nil && actual == nil {
+ return nil
+ }
+ if actual.Kind != expected.Kind {
+ msg := fmt.Sprintf("unexpected kind: expected '%v' but got '%v'", expected.Kind, actual.Kind)
+ return []*TreeDiff{
+ newTreeDiff(expected, actual, msg),
+ }
+ }
+ if len(actual.Children) != len(expected.Children) {
+ msg := fmt.Sprintf("unexpected node count: expected %v but got %v", len(expected.Children), len(actual.Children))
+ return []*TreeDiff{
+ newTreeDiff(expected, actual, msg),
+ }
+ }
+ var diffs []*TreeDiff
+ for i, exp := range expected.Children {
+ if ds := DiffTree(actual.Children[i], exp); len(ds) > 0 {
+ diffs = append(diffs, ds...)
+ }
+ }
+ return diffs
+}
+
+type TestCase struct {
+ Description string
+ Source []byte
+ Output *Tree
+}
+
+func ParseTestCase(r io.Reader) (*TestCase, error) {
+ bufs, err := splitIntoParts(r)
+ if err != nil {
+ return nil, err
+ }
+ if len(bufs) != 3 {
+ return nil, fmt.Errorf("too many or too few part delimiters: a test case consists of just tree parts: %v parts found", len(bufs))
+ }
+
+ tree, err := parseTree(bytes.NewReader(bufs[2]))
+ if err != nil {
+ return nil, err
+ }
+
+ return &TestCase{
+ Description: string(bufs[0]),
+ Source: bufs[1],
+ Output: tree,
+ }, nil
+}
+
+func splitIntoParts(r io.Reader) ([][]byte, error) {
+ var bufs [][]byte
+ s := bufio.NewScanner(r)
+ for {
+ buf, err := readPart(s)
+ if err != nil {
+ return nil, err
+ }
+ if buf == nil {
+ break
+ }
+ bufs = append(bufs, buf)
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ return bufs, nil
+}
+
+var reDelim = regexp.MustCompile(`^\s*---+\s*$`)
+
+func readPart(s *bufio.Scanner) ([]byte, error) {
+ if !s.Scan() {
+ return nil, s.Err()
+ }
+ buf := &bytes.Buffer{}
+ line := s.Bytes()
+ if reDelim.Match(line) {
+ // Return an empty slice because (*bytes.Buffer).Bytes() returns nil if we have never written data.
+ return []byte{}, nil
+ }
+ _, err := buf.Write(line)
+ if err != nil {
+ return nil, err
+ }
+ for s.Scan() {
+ line := s.Bytes()
+ if reDelim.Match(line) {
+ return buf.Bytes(), nil
+ }
+ _, err := buf.Write([]byte("\n"))
+ if err != nil {
+ return nil, err
+ }
+ _, err = buf.Write(line)
+ if err != nil {
+ return nil, err
+ }
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ return buf.Bytes(), nil
+}
+
+func parseTree(src io.Reader) (*Tree, error) {
+ toks, err := NewTokenStream(src)
+ if err != nil {
+ return nil, err
+ }
+ gram := NewGrammar()
+ tb := NewDefaultSyntaxTreeBuilder()
+ p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb)))
+ if err != nil {
+ return nil, err
+ }
+ err = p.Parse()
+ if err != nil {
+ return nil, err
+ }
+ synErrs := p.SyntaxErrors()
+ if len(synErrs) > 0 {
+ var b strings.Builder
+ b.WriteString("syntax error:")
+ for _, synErr := range synErrs {
+ b.WriteRune('\n')
+ b.Write(formatSyntaxError(synErr, gram))
+ }
+ return nil, errors.New(b.String())
+ }
+ return genTree(tb.Tree()).Fill(), nil
+}
+
+func formatSyntaxError(synErr *SyntaxError, gram Grammar) []byte {
+ var b bytes.Buffer
+
+ b.WriteString(fmt.Sprintf("%v:%v: %v: ", synErr.Row+1, synErr.Col+1, synErr.Message))
+
+ tok := synErr.Token
+ switch {
+ case tok.EOF():
+ b.WriteString("<eof>")
+ case tok.Invalid():
+ b.WriteString(fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme())))
+ default:
+ b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), gram.Terminal(tok.TerminalID())))
+ }
+ b.WriteString(fmt.Sprintf("; expected: %v", synErr.ExpectedTerminals[0]))
+ for _, t := range synErr.ExpectedTerminals[1:] {
+ b.WriteString(fmt.Sprintf(", %v", t))
+ }
+
+ return b.Bytes()
+}
+
+func genTree(node *Node) *Tree {
+ var children []*Tree
+ if len(node.Children) > 1 {
+ children = make([]*Tree, len(node.Children)-1)
+ for i, c := range node.Children[1:] {
+ children[i] = genTree(c)
+ }
+ }
+ return NewTree(node.Children[0].Text, children...)
+}
diff --git a/spec/test/parser_test.go b/spec/test/parser_test.go
new file mode 100644
index 0000000..6e77f6d
--- /dev/null
+++ b/spec/test/parser_test.go
@@ -0,0 +1,300 @@
+package test
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestDiffTree(t *testing.T) {
+ tests := []struct {
+ t1 *Tree
+ t2 *Tree
+ different bool
+ }{
+ {
+ t1: NewTree("a"),
+ t2: NewTree("a"),
+ },
+ {
+ t1: NewTree("a",
+ NewTree("b"),
+ ),
+ t2: NewTree("a",
+ NewTree("b"),
+ ),
+ },
+ {
+ t1: NewTree("a",
+ NewTree("b"),
+ NewTree("c"),
+ NewTree("d"),
+ ),
+ t2: NewTree("a",
+ NewTree("b"),
+ NewTree("c"),
+ NewTree("d"),
+ ),
+ },
+ {
+ t1: NewTree("a",
+ NewTree("b",
+ NewTree("c"),
+ ),
+ NewTree("d",
+ NewTree("d"),
+ ),
+ ),
+ t2: NewTree("a",
+ NewTree("b",
+ NewTree("c"),
+ ),
+ NewTree("d",
+ NewTree("d"),
+ ),
+ ),
+ },
+ {
+ t1: NewTree("a"),
+ t2: NewTree("b"),
+ different: true,
+ },
+ {
+ t1: NewTree("a",
+ NewTree("b"),
+ ),
+ t2: NewTree("a"),
+ different: true,
+ },
+ {
+ t1: NewTree("a"),
+ t2: NewTree("a",
+ NewTree("b"),
+ ),
+ different: true,
+ },
+ {
+ t1: NewTree("a",
+ NewTree("b"),
+ ),
+ t2: NewTree("a",
+ NewTree("c"),
+ ),
+ different: true,
+ },
+ {
+ t1: NewTree("a",
+ NewTree("b"),
+ NewTree("c"),
+ NewTree("d"),
+ ),
+ t2: NewTree("a",
+ NewTree("b"),
+ NewTree("c"),
+ ),
+ different: true,
+ },
+ {
+ t1: NewTree("a",
+ NewTree("b"),
+ NewTree("c"),
+ ),
+ t2: NewTree("a",
+ NewTree("b"),
+ NewTree("c"),
+ NewTree("d"),
+ ),
+ different: true,
+ },
+ {
+ t1: NewTree("a",
+ NewTree("b",
+ NewTree("c"),
+ ),
+ ),
+ t2: NewTree("a",
+ NewTree("b",
+ NewTree("d"),
+ ),
+ ),
+ different: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ diffs := DiffTree(tt.t1, tt.t2)
+ if tt.different && len(diffs) == 0 {
+ t.Fatalf("unexpected result")
+ } else if !tt.different && len(diffs) > 0 {
+ t.Fatalf("unexpected result")
+ }
+ })
+ }
+}
+
+func TestParseTestCase(t *testing.T) {
+ tests := []struct {
+ src string
+ tc *TestCase
+ parseErr bool
+ }{
+ {
+ src: `test
+---
+foo
+---
+(foo)
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte("foo"),
+ Output: NewTree("foo").Fill(),
+ },
+ },
+ {
+ src: `
+test
+
+---
+
+foo
+
+---
+
+(foo)
+
+`,
+ tc: &TestCase{
+ Description: "\ntest\n",
+ Source: []byte("\nfoo\n"),
+ Output: NewTree("foo").Fill(),
+ },
+ },
+ // The length of a part delimiter may be greater than 3.
+ {
+ src: `
+test
+----
+foo
+----
+(foo)
+`,
+ tc: &TestCase{
+ Description: "\ntest",
+ Source: []byte("foo"),
+ Output: NewTree("foo").Fill(),
+ },
+ },
+ // The description part may be empty.
+ {
+ src: `----
+foo
+----
+(foo)
+`,
+ tc: &TestCase{
+ Description: "",
+ Source: []byte("foo"),
+ Output: NewTree("foo").Fill(),
+ },
+ },
+ // The source part may be empty.
+ {
+ src: `test
+---
+---
+(foo)
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte{},
+ Output: NewTree("foo").Fill(),
+ },
+ },
+ // NOTE: If there is a delimiter at the end of a test case, we really want to make it a syntax error,
+ // but we allow it to simplify the implementation of the parser.
+ {
+ src: `test
+----
+foo
+----
+(foo)
+---
+`,
+ tc: &TestCase{
+ Description: "test",
+ Source: []byte("foo"),
+ Output: NewTree("foo").Fill(),
+ },
+ },
+ {
+ src: ``,
+ parseErr: true,
+ },
+ {
+ src: `test
+---
+`,
+ parseErr: true,
+ },
+ {
+ src: `test
+---
+foo
+`,
+ parseErr: true,
+ },
+ {
+ src: `test
+---
+foo
+---
+`,
+ parseErr: true,
+ },
+ {
+ src: `test
+--
+foo
+--
+(foo)
+`,
+ parseErr: true,
+ },
+ {
+ src: `test
+---
+foo
+---
+?
+`,
+ parseErr: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ tc, err := ParseTestCase(strings.NewReader(tt.src))
+ if tt.parseErr {
+ if err == nil {
+ t.Fatalf("an expected error didn't occur")
+ }
+ } else {
+ if err != nil {
+ t.Fatal(err)
+ }
+ testTestCase(t, tt.tc, tc)
+ }
+ })
+ }
+}
+
+func testTestCase(t *testing.T, expected, actual *TestCase) {
+ t.Helper()
+
+ if expected.Description != actual.Description ||
+ !reflect.DeepEqual(expected.Source, actual.Source) ||
+ len(DiffTree(expected.Output, actual.Output)) > 0 {
+ t.Fatalf("unexpected test case: want: %#v, got: %#v", expected, actual)
+ }
+}
diff --git a/spec/test/tree-report.json b/spec/test/tree-report.json
new file mode 100644
index 0000000..71cdf0e
--- /dev/null
+++ b/spec/test/tree-report.json
@@ -0,0 +1 @@
+{"class":"LALR(1)","terminals":[null,{"number":1,"name":"\u003ceof\u003e","anonymous":false,"alias":"","pattern":"","prec":0,"assoc":""},{"number":2,"name":"error","anonymous":false,"alias":"","pattern":"","prec":0,"assoc":""},{"number":3,"name":"ws","anonymous":false,"alias":"","pattern":"","prec":0,"assoc":""},{"number":4,"name":"l_paren","anonymous":false,"alias":"(","pattern":"","prec":1,"assoc":""},{"number":5,"name":"r_paren","anonymous":false,"alias":")","pattern":"","prec":0,"assoc":""},{"number":6,"name":"identifier","anonymous":false,"alias":"","pattern":"","prec":0,"assoc":""}],"non_terminals":[null,{"number":1,"name":"tree'"},{"number":2,"name":"tree"},{"number":3,"name":"tree_list"}],"productions":[null,{"number":1,"lhs":1,"rhs":[-2],"prec":0,"assoc":""},{"number":2,"lhs":2,"rhs":[4,6,-3,5],"prec":0,"assoc":""},{"number":3,"lhs":2,"rhs":[4,2,5],"prec":0,"assoc":""},{"number":4,"lhs":3,"rhs":[-3,-2],"prec":0,"assoc":""},{"number":5,"lhs":3,"rhs":[-2],"prec":0,"assoc":""},{"number":6,"lhs":3,"rhs":[],"prec":2,"assoc":""}],"states":[{"number":0,"kernel":[{"production":1,"dot":0}],"shift":[{"symbol":4,"state":2}],"reduce":null,"goto":[{"symbol":2,"state":1}],"sr_conflict":[],"rr_conflict":[]},{"number":1,"kernel":[{"production":1,"dot":1}],"shift":null,"reduce":[{"look_ahead":[1],"production":1}],"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":2,"kernel":[{"production":2,"dot":1},{"production":3,"dot":1}],"shift":[{"symbol":2,"state":3},{"symbol":6,"state":4}],"reduce":null,"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":3,"kernel":[{"production":3,"dot":2}],"shift":[{"symbol":5,"state":5}],"reduce":null,"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":4,"kernel":[{"production":2,"dot":2}],"shift":[{"symbol":4,"state":2}],"reduce":[{"look_ahead":[5,1],"production":6}],"goto":[{"symbol":2,"state":6},{"symbol":3,"state":7}],"sr_conflict":[{"symbol":4,"state":2,"production":6,"adopted_state":2,"adopted_production":null,"resolved_by":1}],"rr_conflict":[]},{"number":5,"kernel":[{"production":3,"dot":3}],"shift":null,"reduce":[{"look_ahead":[4,5,1],"production":3}],"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":6,"kernel":[{"production":5,"dot":1}],"shift":null,"reduce":[{"look_ahead":[4,5],"production":5}],"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":7,"kernel":[{"production":2,"dot":3},{"production":4,"dot":1}],"shift":[{"symbol":4,"state":2},{"symbol":5,"state":9}],"reduce":null,"goto":[{"symbol":2,"state":8}],"sr_conflict":[],"rr_conflict":[]},{"number":8,"kernel":[{"production":4,"dot":2}],"shift":null,"reduce":[{"look_ahead":[4,5],"production":4}],"goto":null,"sr_conflict":[],"rr_conflict":[]},{"number":9,"kernel":[{"production":2,"dot":4}],"shift":null,"reduce":[{"look_ahead":[4,5,1],"production":2}],"goto":null,"sr_conflict":[],"rr_conflict":[]}]} \ No newline at end of file
diff --git a/spec/test/tree.json b/spec/test/tree.json
new file mode 100644
index 0000000..8387bec
--- /dev/null
+++ b/spec/test/tree.json
@@ -0,0 +1 @@
+{"name":"tree","lexical_specification":{"lexer":"maleeni","maleeni":{"spec":{"name":"tree","initial_mode_id":1,"mode_names":["","default"],"kind_names":["","ws","l_paren","r_paren","identifier"],"kind_ids":[null,[0,1,2,3,4]],"compression_level":2,"specs":[null,{"kind_names":["","ws","l_paren","r_paren","identifier"],"push":[0,0,0,0,0],"pop":[0,0,0,0,0],"dfa":{"initial_state_id":1,"accepting_states":[0,0,1,4,2,3],"row_count":6,"col_count":256,"transition":{"unique_entries":{"original_row_count":4,"original_col_count":256,"empty_value":0,"entries":[0,0,0,0,0,0,0,0,0,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,4,5,0,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0,0,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3,0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"bounds":[-1,-1,-1,-1,-1,-1,-1,-1,-1,1,1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1,-1,-1,-1,-1,-1,-1,-1,1,1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,1,1,-1,-1,-1,-1,-1,-1,-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,-1,-1,-1,-1,1,-1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,3,3,3,3,3,3,3,3,3,3,-1,-1,-1,-1,-1,-1,-1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,-1,-1,-1,-1,3,-1,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,2,2,-1,-1,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1],"row_displacement":[0,0,189,75]},"row_nums":[0,1,2,3,0,0],"original_row_count":6,"original_col_count":256,"empty_value":0}}}]},"kind_to_terminal":[0,3,4,5,6],"terminal_to_kind":[0,0,0,1,2,3,4],"skip":[0,1,0,0,0],"kind_aliases":["","","","","(",")",""]}},"parsing_table":{"class":"lalr","action":[0,0,0,0,-2,0,0,0,1,0,0,0,0,0,0,0,-3,0,0,0,-4,0,0,0,0,0,-5,0,0,6,0,0,-2,6,0,0,3,0,0,3,3,0,0,0,0,0,5,5,0,0,0,0,0,-2,-9,0,0,0,0,0,4,4,0,0,2,0,0,2,2,0],"goto":[0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,7,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0],"state_count":10,"initial_state":0,"start_production":1,"lhs_symbols":[0,1,2,2,3,3,3],"alternative_symbol_counts":[0,1,4,3,2,1,0],"terminals":["","\u003ceof\u003e","error","ws","l_paren","r_paren","identifier"],"terminal_count":7,"non_terminals":["","tree'","tree","tree_list"],"non_terminal_count":4,"eof_symbol":1,"error_symbol":2,"error_trapper_states":[0,0,1,0,0,0,0,0,0,0],"recover_productions":[0,0,0,1,0,0,0]},"ast_action":{"entries":[null,null,[2,-3],[2],[-1,2],null,null]}}
diff --git a/spec/test/tree.vartan b/spec/test/tree.vartan
new file mode 100644
index 0000000..139014d
--- /dev/null
+++ b/spec/test/tree.vartan
@@ -0,0 +1,25 @@
+#name tree;
+
+#prec (
+ #assign l_paren
+ #assign $empty_tree
+);
+
+tree
+ : l_paren identifier tree_list r_paren #ast identifier tree_list...
+ | l_paren error r_paren #recover #ast error
+ ;
+tree_list
+ : tree_list tree #ast tree_list... tree
+ | tree
+ | #prec $empty_tree
+ ;
+
+ws #skip
+ : "[\u{0009}\u{000A}\u{000D}\u{0020}]+";
+l_paren
+ : '(';
+r_paren
+ : ')';
+identifier
+ : "[0-9A-Za-z_]+";
diff --git a/spec/test/tree_lexer.go b/spec/test/tree_lexer.go
new file mode 100644
index 0000000..931626f
--- /dev/null
+++ b/spec/test/tree_lexer.go
@@ -0,0 +1,549 @@
+// Code generated by maleeni-go. DO NOT EDIT.
+package test
+
+import (
+ "fmt"
+ "io"
+ "io/ioutil"
+)
+
+type ModeID int
+
+func (id ModeID) Int() int {
+ return int(id)
+}
+
+type StateID int
+
+func (id StateID) Int() int {
+ return int(id)
+}
+
+type KindID int
+
+func (id KindID) Int() int {
+ return int(id)
+}
+
+type ModeKindID int
+
+func (id ModeKindID) Int() int {
+ return int(id)
+}
+
+type LexSpec interface {
+ InitialMode() ModeID
+ Pop(mode ModeID, modeKind ModeKindID) bool
+ Push(mode ModeID, modeKind ModeKindID) (ModeID, bool)
+ ModeName(mode ModeID) string
+ InitialState(mode ModeID) StateID
+ NextState(mode ModeID, state StateID, v int) (StateID, bool)
+ Accept(mode ModeID, state StateID) (ModeKindID, bool)
+ KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string)
+}
+
+// Token representes a token.
+type Token struct {
+ // ModeID is an ID of a lex mode.
+ ModeID ModeID
+
+ // KindID is an ID of a kind. This is unique among all modes.
+ KindID KindID
+
+ // ModeKindID is an ID of a lexical kind. This is unique only within a mode.
+ // Note that you need to use KindID field if you want to identify a kind across all modes.
+ ModeKindID ModeKindID
+
+ // Row is a row number where a lexeme appears.
+ Row int
+
+ // Col is a column number where a lexeme appears.
+ // Note that Col is counted in code points, not bytes.
+ Col int
+
+ // Lexeme is a byte sequence matched a pattern of a lexical specification.
+ Lexeme []byte
+
+ // When this field is true, it means the token is the EOF token.
+ EOF bool
+
+ // When this field is true, it means the token is an error token.
+ Invalid bool
+}
+
+type LexerOption func(l *Lexer) error
+
+// DisableModeTransition disables the active mode transition. Thus, even if the lexical specification has the push and pop
+// operations, the lexer doesn't perform these operations. When the lexical specification has multiple modes, and this option is
+// enabled, you need to call the Lexer.Push and Lexer.Pop methods to perform the mode transition. You can use the Lexer.Mode method
+// to know the current lex mode.
+func DisableModeTransition() LexerOption {
+ return func(l *Lexer) error {
+ l.passiveModeTran = true
+ return nil
+ }
+}
+
+type Lexer struct {
+ spec LexSpec
+ src []byte
+ srcPtr int
+ row int
+ col int
+ prevRow int
+ prevCol int
+ tokBuf []*Token
+ modeStack []ModeID
+ passiveModeTran bool
+}
+
+// NewLexer returns a new lexer.
+func NewLexer(spec LexSpec, src io.Reader, opts ...LexerOption) (*Lexer, error) {
+ b, err := ioutil.ReadAll(src)
+ if err != nil {
+ return nil, err
+ }
+ l := &Lexer{
+ spec: spec,
+ src: b,
+ srcPtr: 0,
+ row: 0,
+ col: 0,
+ modeStack: []ModeID{
+ spec.InitialMode(),
+ },
+ passiveModeTran: false,
+ }
+ for _, opt := range opts {
+ err := opt(l)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return l, nil
+}
+
+// Next returns a next token.
+func (l *Lexer) Next() (*Token, error) {
+ if len(l.tokBuf) > 0 {
+ tok := l.tokBuf[0]
+ l.tokBuf = l.tokBuf[1:]
+ return tok, nil
+ }
+
+ tok, err := l.nextAndTransition()
+ if err != nil {
+ return nil, err
+ }
+ if !tok.Invalid {
+ return tok, nil
+ }
+ errTok := tok
+ for {
+ tok, err = l.nextAndTransition()
+ if err != nil {
+ return nil, err
+ }
+ if !tok.Invalid {
+ break
+ }
+ errTok.Lexeme = append(errTok.Lexeme, tok.Lexeme...)
+ }
+ l.tokBuf = append(l.tokBuf, tok)
+
+ return errTok, nil
+}
+
+func (l *Lexer) nextAndTransition() (*Token, error) {
+ tok, err := l.next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.EOF || tok.Invalid {
+ return tok, nil
+ }
+ if l.passiveModeTran {
+ return tok, nil
+ }
+ mode := l.Mode()
+ if l.spec.Pop(mode, tok.ModeKindID) {
+ err := l.PopMode()
+ if err != nil {
+ return nil, err
+ }
+ }
+ if mode, ok := l.spec.Push(mode, tok.ModeKindID); ok {
+ l.PushMode(mode)
+ }
+ // The checking length of the mode stack must be at after pop and push operations because those operations can be performed
+ // at the same time. When the mode stack has just one element and popped it, the mode stack will be temporarily emptied.
+ // However, since a push operation may be performed immediately after it, the lexer allows the stack to be temporarily empty.
+ if len(l.modeStack) == 0 {
+ return nil, fmt.Errorf("a mode stack must have at least one element")
+ }
+ return tok, nil
+}
+
+func (l *Lexer) next() (*Token, error) {
+ mode := l.Mode()
+ state := l.spec.InitialState(mode)
+ buf := []byte{}
+ unfixedBufLen := 0
+ row := l.row
+ col := l.col
+ var tok *Token
+ for {
+ v, eof := l.read()
+ if eof {
+ if tok != nil {
+ l.unread(unfixedBufLen)
+ return tok, nil
+ }
+ // When `buf` has unaccepted data and reads the EOF, the lexer treats the buffered data as an invalid token.
+ if len(buf) > 0 {
+ return &Token{
+ ModeID: mode,
+ ModeKindID: 0,
+ Lexeme: buf,
+ Row: row,
+ Col: col,
+ Invalid: true,
+ }, nil
+ }
+ return &Token{
+ ModeID: mode,
+ ModeKindID: 0,
+ Row: 0,
+ Col: 0,
+ EOF: true,
+ }, nil
+ }
+ buf = append(buf, v)
+ unfixedBufLen++
+ nextState, ok := l.spec.NextState(mode, state, int(v))
+ if !ok {
+ if tok != nil {
+ l.unread(unfixedBufLen)
+ return tok, nil
+ }
+ return &Token{
+ ModeID: mode,
+ ModeKindID: 0,
+ Lexeme: buf,
+ Row: row,
+ Col: col,
+ Invalid: true,
+ }, nil
+ }
+ state = nextState
+ if modeKindID, ok := l.spec.Accept(mode, state); ok {
+ kindID, _ := l.spec.KindIDAndName(mode, modeKindID)
+ tok = &Token{
+ ModeID: mode,
+ KindID: kindID,
+ ModeKindID: modeKindID,
+ Lexeme: buf,
+ Row: row,
+ Col: col,
+ }
+ unfixedBufLen = 0
+ }
+ }
+}
+
+// Mode returns the current lex mode.
+func (l *Lexer) Mode() ModeID {
+ return l.modeStack[len(l.modeStack)-1]
+}
+
+// PushMode adds a lex mode onto the mode stack.
+func (l *Lexer) PushMode(mode ModeID) {
+ l.modeStack = append(l.modeStack, mode)
+}
+
+// PopMode removes a lex mode from the top of the mode stack.
+func (l *Lexer) PopMode() error {
+ sLen := len(l.modeStack)
+ if sLen == 0 {
+ return fmt.Errorf("cannot pop a lex mode from a lex mode stack any more")
+ }
+ l.modeStack = l.modeStack[:sLen-1]
+ return nil
+}
+
+func (l *Lexer) read() (byte, bool) {
+ if l.srcPtr >= len(l.src) {
+ return 0, true
+ }
+
+ b := l.src[l.srcPtr]
+ l.srcPtr++
+
+ l.prevRow = l.row
+ l.prevCol = l.col
+
+ // Count the token positions.
+ // The driver treats LF as the end of lines and counts columns in code points, not bytes.
+ // To count in code points, we refer to the First Byte column in the Table 3-6.
+ //
+ // Reference:
+ // - [Table 3-6] https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf > Table 3-6. UTF-8 Bit Distribution
+ if b < 128 {
+ // 0x0A is LF.
+ if b == 0x0A {
+ l.row++
+ l.col = 0
+ } else {
+ l.col++
+ }
+ } else if b>>5 == 6 || b>>4 == 14 || b>>3 == 30 {
+ l.col++
+ }
+
+ return b, false
+}
+
+// We must not call this function consecutively to record the token position correctly.
+func (l *Lexer) unread(n int) {
+ l.srcPtr -= n
+
+ l.row = l.prevRow
+ l.col = l.prevCol
+}
+
+const (
+ ModeIDNil ModeID = 0
+ ModeIDDefault ModeID = 1
+)
+
+const (
+ ModeNameNil = ""
+ ModeNameDefault = "default"
+)
+
+// ModeIDToName converts a mode ID to a name.
+func ModeIDToName(id ModeID) string {
+ switch id {
+ case ModeIDNil:
+ return ModeNameNil
+ case ModeIDDefault:
+ return ModeNameDefault
+ }
+ return ""
+}
+
+const (
+ KindIDNil KindID = 0
+ KindIDWs KindID = 1
+ KindIDLParen KindID = 2
+ KindIDRParen KindID = 3
+ KindIDIdentifier KindID = 4
+)
+
+const (
+ KindNameNil = ""
+ KindNameWs = "ws"
+ KindNameLParen = "l_paren"
+ KindNameRParen = "r_paren"
+ KindNameIdentifier = "identifier"
+)
+
+// KindIDToName converts a kind ID to a name.
+func KindIDToName(id KindID) string {
+ switch id {
+ case KindIDNil:
+ return KindNameNil
+ case KindIDWs:
+ return KindNameWs
+ case KindIDLParen:
+ return KindNameLParen
+ case KindIDRParen:
+ return KindNameRParen
+ case KindIDIdentifier:
+ return KindNameIdentifier
+ }
+ return ""
+}
+
+type lexSpec struct {
+ pop [][]bool
+ push [][]ModeID
+ modeNames []string
+ initialStates []StateID
+ acceptances [][]ModeKindID
+ kindIDs [][]KindID
+ kindNames []string
+ initialModeID ModeID
+ modeIDNil ModeID
+ modeKindIDNil ModeKindID
+ stateIDNil StateID
+
+ rowNums [][]int
+ rowDisplacements [][]int
+ bounds [][]int
+ entries [][]StateID
+ originalColCounts []int
+}
+
+func NewLexSpec() *lexSpec {
+ return &lexSpec{
+ pop: [][]bool{
+ nil,
+ {
+ false, false, false, false, false,
+ },
+ },
+ push: [][]ModeID{
+ nil,
+ {
+ 0, 0, 0, 0, 0,
+ },
+ },
+ modeNames: []string{
+ ModeNameNil,
+ ModeNameDefault,
+ },
+ initialStates: []StateID{
+ 0,
+ 1,
+ },
+ acceptances: [][]ModeKindID{
+ nil,
+ {
+ 0, 0, 1, 4, 2, 3,
+ },
+ },
+ kindIDs: [][]KindID{
+ nil,
+ {
+ KindIDNil,
+ KindIDWs,
+ KindIDLParen,
+ KindIDRParen,
+ KindIDIdentifier,
+ },
+ },
+ kindNames: []string{
+ KindNameNil,
+ KindNameWs,
+ KindNameLParen,
+ KindNameRParen,
+ KindNameIdentifier,
+ },
+ initialModeID: ModeIDDefault,
+ modeIDNil: ModeIDNil,
+ modeKindIDNil: 0,
+ stateIDNil: 0,
+
+ rowNums: [][]int{
+ nil,
+ {
+ 0, 1, 2, 3, 0, 0,
+ },
+ },
+ rowDisplacements: [][]int{
+ nil,
+ {
+ 0, 0, 189, 75,
+ },
+ },
+ bounds: [][]int{
+ nil,
+ {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1,
+ 1, 1, -1, -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1,
+ -1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, -1, -1, -1, -1, 3, -1, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2,
+ -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1,
+ },
+ },
+ entries: [][]StateID{
+ nil,
+ {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0,
+ 4, 5, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0,
+ 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 0, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 0, 0, 0, 0, 3, 0, 3, 3, 3, 3, 3, 3, 3, 3,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2,
+ 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0,
+ },
+ },
+ originalColCounts: nil,
+ }
+}
+
+func (s *lexSpec) InitialMode() ModeID {
+ return s.initialModeID
+}
+
+func (s *lexSpec) Pop(mode ModeID, modeKind ModeKindID) bool {
+ return s.pop[mode][modeKind]
+}
+
+func (s *lexSpec) Push(mode ModeID, modeKind ModeKindID) (ModeID, bool) {
+ id := s.push[mode][modeKind]
+ return id, id != s.modeIDNil
+}
+
+func (s *lexSpec) ModeName(mode ModeID) string {
+ return s.modeNames[mode]
+}
+
+func (s *lexSpec) InitialState(mode ModeID) StateID {
+ return s.initialStates[mode]
+}
+
+func (s *lexSpec) NextState(mode ModeID, state StateID, v int) (StateID, bool) {
+ rowNum := s.rowNums[mode][state]
+ d := s.rowDisplacements[mode][rowNum]
+ if s.bounds[mode][d+v] != rowNum {
+ return s.stateIDNil, false
+ }
+ return s.entries[mode][d+v], true
+}
+
+func (s *lexSpec) Accept(mode ModeID, state StateID) (ModeKindID, bool) {
+ id := s.acceptances[mode][state]
+ return id, id != s.modeKindIDNil
+}
+
+func (s *lexSpec) KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string) {
+ id := s.kindIDs[mode][modeKind]
+ return id, s.kindNames[id]
+}
diff --git a/spec/test/tree_parser.go b/spec/test/tree_parser.go
new file mode 100644
index 0000000..567e3b0
--- /dev/null
+++ b/spec/test/tree_parser.go
@@ -0,0 +1,638 @@
+// Code generated by vartan-go. DO NOT EDIT.
+package test
+
+import (
+ "fmt"
+ "io"
+)
+
+type Grammar interface {
+ // Class returns a class of grammar.
+ Class() string
+
+ // InitialState returns the initial state of a parser.
+ InitialState() int
+
+ // StartProduction returns the start production of grammar.
+ StartProduction() int
+
+ // Action returns an ACTION entry corresponding to a (state, terminal symbol) pair.
+ Action(state int, terminal int) int
+
+ // GoTo returns a GOTO entry corresponding to a (state, non-terminal symbol) pair.
+ GoTo(state int, lhs int) int
+
+ // ErrorTrapperState returns true when a state can shift the error symbol.
+ ErrorTrapperState(state int) bool
+
+ // LHS returns a LHS symbol of a production.
+ LHS(prod int) int
+
+ // AlternativeSymbolCount returns a symbol count of p production.
+ AlternativeSymbolCount(prod int) int
+
+ // RecoverProduction returns true when a production has the recover directive.
+ RecoverProduction(prod int) bool
+
+ // NonTerminal retuns a string representaion of a non-terminal symbol.
+ NonTerminal(nonTerminal int) string
+
+ // TerminalCount returns a terminal symbol count of grammar.
+ TerminalCount() int
+
+ // EOF returns the EOF symbol.
+ EOF() int
+
+ // Error returns the error symbol.
+ Error() int
+
+ // Terminal retuns a string representaion of a terminal symbol.
+ Terminal(terminal int) string
+
+ // TerminalAlias returns an alias for a terminal.
+ TerminalAlias(terminal int) string
+
+ // ASTAction returns an AST action entries.
+ ASTAction(prod int) []int
+}
+
+type VToken interface {
+ // TerminalID returns a terminal ID.
+ TerminalID() int
+
+ // Lexeme returns a lexeme.
+ Lexeme() []byte
+
+ // EOF returns true when a token represents EOF.
+ EOF() bool
+
+ // Invalid returns true when a token is invalid.
+ Invalid() bool
+
+ // Position returns (row, column) pair.
+ Position() (int, int)
+
+ // Skip returns true when a token must be skipped on syntax analysis.
+ Skip() bool
+}
+
+type TokenStream interface {
+ Next() (VToken, error)
+}
+
+type SyntaxError struct {
+ Row int
+ Col int
+ Message string
+ Token VToken
+ ExpectedTerminals []string
+}
+
+type ParserOption func(p *Parser) error
+
+// DisableLAC disables LAC (lookahead correction). When the grammar has the LALR class, LAC is enabled by default.
+func DisableLAC() ParserOption {
+ return func(p *Parser) error {
+ p.disableLAC = true
+ return nil
+ }
+}
+
+func SemanticAction(semAct SemanticActionSet) ParserOption {
+ return func(p *Parser) error {
+ p.semAct = semAct
+ return nil
+ }
+}
+
+type Parser struct {
+ toks TokenStream
+ gram Grammar
+ stateStack *stateStack
+ semAct SemanticActionSet
+ disableLAC bool
+ onError bool
+ shiftCount int
+ synErrs []*SyntaxError
+}
+
+func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) {
+ p := &Parser{
+ toks: toks,
+ gram: gram,
+ stateStack: &stateStack{},
+ }
+
+ if p.gram.Class() != "lalr" {
+ p.disableLAC = true
+ }
+
+ for _, opt := range opts {
+ err := opt(p)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return p, nil
+}
+
+func (p *Parser) Parse() error {
+ p.stateStack.push(p.gram.InitialState())
+ tok, err := p.nextToken()
+ if err != nil {
+ return err
+ }
+
+ACTION_LOOP:
+ for {
+ act := p.lookupAction(tok)
+
+ switch {
+ case act < 0: // Shift
+ nextState := act * -1
+
+ recovered := false
+ if p.onError {
+ p.shiftCount++
+
+ // When the parser performs shift three times, the parser recovers from the error state.
+ if p.shiftCount >= 3 {
+ p.onError = false
+ p.shiftCount = 0
+ recovered = true
+ }
+ }
+
+ p.shift(nextState)
+
+ if p.semAct != nil {
+ p.semAct.Shift(tok, recovered)
+ }
+
+ tok, err = p.nextToken()
+ if err != nil {
+ return err
+ }
+ case act > 0: // Reduce
+ prodNum := act
+
+ recovered := false
+ if p.onError && p.gram.RecoverProduction(prodNum) {
+ p.onError = false
+ p.shiftCount = 0
+ recovered = true
+ }
+
+ accepted := p.reduce(prodNum)
+ if accepted {
+ if p.semAct != nil {
+ p.semAct.Accept()
+ }
+
+ return nil
+ }
+
+ if p.semAct != nil {
+ p.semAct.Reduce(prodNum, recovered)
+ }
+ default: // Error
+ if p.onError {
+ tok, err = p.nextToken()
+ if err != nil {
+ return err
+ }
+ if tok.EOF() {
+ if p.semAct != nil {
+ p.semAct.MissError(tok)
+ }
+
+ return nil
+ }
+
+ continue ACTION_LOOP
+ }
+
+ row, col := tok.Position()
+ p.synErrs = append(p.synErrs, &SyntaxError{
+ Row: row,
+ Col: col,
+ Message: "unexpected token",
+ Token: tok,
+ ExpectedTerminals: p.searchLookahead(p.stateStack.top()),
+ })
+
+ count, ok := p.trapError()
+ if !ok {
+ if p.semAct != nil {
+ p.semAct.MissError(tok)
+ }
+
+ return nil
+ }
+
+ p.onError = true
+ p.shiftCount = 0
+
+ act, err := p.lookupActionOnError()
+ if err != nil {
+ return err
+ }
+
+ p.shift(act * -1)
+
+ if p.semAct != nil {
+ p.semAct.TrapAndShiftError(tok, count)
+ }
+ }
+ }
+}
+
+// validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid,
+// this method returns `true`.
+func (p *Parser) validateLookahead(term int) bool {
+ p.stateStack.enableExploratoryMode()
+ defer p.stateStack.disableExploratoryMode()
+
+ for {
+ act := p.gram.Action(p.stateStack.topExploratorily(), term)
+
+ switch {
+ case act < 0: // Shift
+ return true
+ case act > 0: // Reduce
+ prodNum := act
+
+ lhs := p.gram.LHS(prodNum)
+ if lhs == p.gram.LHS(p.gram.StartProduction()) {
+ return true
+ }
+ n := p.gram.AlternativeSymbolCount(prodNum)
+ p.stateStack.popExploratorily(n)
+ state := p.gram.GoTo(p.stateStack.topExploratorily(), lhs)
+ p.stateStack.pushExploratorily(state)
+ default: // Error
+ return false
+ }
+ }
+}
+
+func (p *Parser) nextToken() (VToken, error) {
+ for {
+ // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0,
+ // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect
+ // a syntax error because the parser cannot find an entry corresponding to the invalid token.
+ tok, err := p.toks.Next()
+ if err != nil {
+ return nil, err
+ }
+
+ if tok.Skip() {
+ continue
+ }
+
+ return tok, nil
+ }
+}
+
+func (p *Parser) tokenToTerminal(tok VToken) int {
+ if tok.EOF() {
+ return p.gram.EOF()
+ }
+
+ return tok.TerminalID()
+}
+
+func (p *Parser) lookupAction(tok VToken) int {
+ if !p.disableLAC {
+ term := p.tokenToTerminal(tok)
+ if !p.validateLookahead(term) {
+ return 0
+ }
+ }
+
+ return p.gram.Action(p.stateStack.top(), p.tokenToTerminal(tok))
+}
+
+func (p *Parser) lookupActionOnError() (int, error) {
+ act := p.gram.Action(p.stateStack.top(), p.gram.Error())
+ if act >= 0 {
+ return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.Terminal(p.gram.Error()))
+ }
+
+ return act, nil
+}
+
+func (p *Parser) shift(nextState int) {
+ p.stateStack.push(nextState)
+}
+
+func (p *Parser) reduce(prodNum int) bool {
+ lhs := p.gram.LHS(prodNum)
+ if lhs == p.gram.LHS(p.gram.StartProduction()) {
+ return true
+ }
+ n := p.gram.AlternativeSymbolCount(prodNum)
+ p.stateStack.pop(n)
+ nextState := p.gram.GoTo(p.stateStack.top(), lhs)
+ p.stateStack.push(nextState)
+ return false
+}
+
+func (p *Parser) trapError() (int, bool) {
+ count := 0
+ for {
+ if p.gram.ErrorTrapperState(p.stateStack.top()) {
+ return count, true
+ }
+
+ if p.stateStack.top() != p.gram.InitialState() {
+ p.stateStack.pop(1)
+ count++
+ } else {
+ return 0, false
+ }
+ }
+}
+
+func (p *Parser) SyntaxErrors() []*SyntaxError {
+ return p.synErrs
+}
+
+func (p *Parser) searchLookahead(state int) []string {
+ kinds := []string{}
+ termCount := p.gram.TerminalCount()
+ for term := 0; term < termCount; term++ {
+ if p.disableLAC {
+ if p.gram.Action(p.stateStack.top(), term) == 0 {
+ continue
+ }
+ } else {
+ if !p.validateLookahead(term) {
+ continue
+ }
+ }
+
+ // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol
+ // intentionally.
+ if term == p.gram.Error() {
+ continue
+ }
+
+ if alias := p.gram.TerminalAlias(term); alias != "" {
+ kinds = append(kinds, alias)
+ } else {
+ kinds = append(kinds, p.gram.Terminal(term))
+ }
+ }
+
+ return kinds
+}
+
+type stateStack struct {
+ items []int
+ itemsExp []int
+}
+
+func (s *stateStack) enableExploratoryMode() {
+ s.itemsExp = make([]int, len(s.items))
+ copy(s.itemsExp, s.items)
+}
+
+func (s *stateStack) disableExploratoryMode() {
+ s.itemsExp = nil
+}
+
+func (s *stateStack) top() int {
+ return s.items[len(s.items)-1]
+}
+
+func (s *stateStack) topExploratorily() int {
+ return s.itemsExp[len(s.itemsExp)-1]
+}
+
+func (s *stateStack) push(state int) {
+ s.items = append(s.items, state)
+}
+
+func (s *stateStack) pushExploratorily(state int) {
+ s.itemsExp = append(s.itemsExp, state)
+}
+
+func (s *stateStack) pop(n int) {
+ s.items = s.items[:len(s.items)-n]
+}
+
+func (s *stateStack) popExploratorily(n int) {
+ s.itemsExp = s.itemsExp[:len(s.itemsExp)-n]
+}
+
+type grammarImpl struct {
+ recoverProductions []int
+ action []int
+ goTo []int
+ alternativeSymbolCounts []int
+ errorTrapperStates []int
+ nonTerminals []string
+ lhsSymbols []int
+ terminals []string
+ terminalAliases []string
+ astActions [][]int
+}
+
+func NewGrammar() *grammarImpl {
+ return &grammarImpl{
+ recoverProductions: []int{
+ 0, 0, 0, 1, 0, 0, 0,
+ },
+ action: []int{
+ 0, 0, 0, 0, -2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0,
+ -4, 0, 0, 0, 0, 0, -5, 0, 0, 6, 0, 0, -2, 6, 0, 0, 3, 0, 0, 3,
+ 3, 0, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, -2, -9, 0, 0, 0, 0, 0,
+ 4, 4, 0, 0, 2, 0, 0, 2, 2, 0,
+ },
+ goTo: []int{
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ },
+ alternativeSymbolCounts: []int{
+ 0, 1, 4, 3, 2, 1, 0,
+ },
+ errorTrapperStates: []int{
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+ },
+ nonTerminals: []string{
+ "",
+ "tree'",
+ "tree",
+ "tree_list",
+ },
+ lhsSymbols: []int{
+ 0, 1, 2, 2, 3, 3, 3,
+ },
+ terminals: []string{
+ "",
+ "<eof>",
+ "error",
+ "ws",
+ "l_paren",
+ "r_paren",
+ "identifier",
+ },
+ terminalAliases: []string{
+ "",
+ "",
+ "",
+ "",
+ "(",
+ ")",
+ "",
+ },
+ astActions: [][]int{
+ nil,
+ nil,
+ {
+ 2, -3,
+ },
+ {
+ 2,
+ },
+ {
+ -1, 2,
+ },
+ nil,
+ nil,
+ },
+ }
+}
+
+func (g *grammarImpl) Class() string {
+ return "lalr"
+}
+
+func (g *grammarImpl) InitialState() int {
+ return 0
+}
+
+func (g *grammarImpl) StartProduction() int {
+ return 1
+}
+
+func (g *grammarImpl) RecoverProduction(prod int) bool {
+ return g.recoverProductions[prod] != 0
+}
+
+func (g *grammarImpl) Action(state int, terminal int) int {
+ return g.action[state*7+terminal]
+}
+
+func (g *grammarImpl) GoTo(state int, lhs int) int {
+ return g.goTo[state*4+lhs]
+}
+
+func (g *grammarImpl) AlternativeSymbolCount(prod int) int {
+ return g.alternativeSymbolCounts[prod]
+}
+
+func (g *grammarImpl) TerminalCount() int {
+ return 7
+}
+
+func (g *grammarImpl) ErrorTrapperState(state int) bool {
+ return g.errorTrapperStates[state] != 0
+}
+
+func (g *grammarImpl) NonTerminal(nonTerminal int) string {
+ return g.nonTerminals[nonTerminal]
+}
+
+func (g *grammarImpl) LHS(prod int) int {
+ return g.lhsSymbols[prod]
+}
+
+func (g *grammarImpl) EOF() int {
+ return 1
+}
+
+func (g *grammarImpl) Error() int {
+ return 2
+}
+
+func (g *grammarImpl) Terminal(terminal int) string {
+ return g.terminals[terminal]
+}
+
+func (g *grammarImpl) TerminalAlias(terminal int) string {
+ return g.terminalAliases[terminal]
+}
+
+func (g *grammarImpl) ASTAction(prod int) []int {
+ return g.astActions[prod]
+}
+
+type vToken struct {
+ terminalID int
+ skip bool
+ tok *Token
+}
+
+func (t *vToken) TerminalID() int {
+ return t.terminalID
+}
+
+func (t *vToken) Lexeme() []byte {
+ return t.tok.Lexeme
+}
+
+func (t *vToken) EOF() bool {
+ return t.tok.EOF
+}
+
+func (t *vToken) Invalid() bool {
+ return t.tok.Invalid
+}
+
+func (t *vToken) Skip() bool {
+ return t.skip
+}
+
+func (t *vToken) Position() (int, int) {
+ return t.tok.Row, t.tok.Col
+}
+
+var kindToTerminal = []int{
+ 0, 3, 4, 5, 6,
+}
+
+var skip = []int{
+ 0, 1, 0, 0, 0,
+}
+
+type tokenStream struct {
+ lex *Lexer
+ kindToTerminal []int
+ skip []int
+}
+
+func NewTokenStream(src io.Reader) (*tokenStream, error) {
+ lex, err := NewLexer(NewLexSpec(), src)
+ if err != nil {
+ return nil, err
+ }
+
+ return &tokenStream{
+ lex: lex,
+ }, nil
+}
+
+func (t *tokenStream) Next() (VToken, error) {
+ tok, err := t.lex.Next()
+ if err != nil {
+ return nil, err
+ }
+ return &vToken{
+ terminalID: kindToTerminal[tok.KindID],
+ skip: skip[tok.KindID] > 0,
+ tok: tok,
+ }, nil
+}
diff --git a/spec/test/tree_semantic_action.go b/spec/test/tree_semantic_action.go
new file mode 100644
index 0000000..9e240c2
--- /dev/null
+++ b/spec/test/tree_semantic_action.go
@@ -0,0 +1,353 @@
+// Code generated by vartan-go. DO NOT EDIT.
+package test
+
+import (
+ "encoding/json"
+ "fmt"
+ "io"
+)
+
+// SemanticActionSet is a set of semantic actions a parser calls.
+type SemanticActionSet interface {
+ // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol.
+ // When the parser recovered from an error state by shifting the token, `recovered` is true.
+ Shift(tok VToken, recovered bool)
+
+ // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production.
+ // When the parser recovered from an error state by reducing the production, `recovered` is true.
+ Reduce(prodNum int, recovered bool)
+
+ // Accept runs when the parser accepts an input.
+ Accept()
+
+ // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack.
+ // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards
+ // from the state stack.
+ // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token
+ // corresponding to the error symbol doesn't exist.
+ TrapAndShiftError(cause VToken, popped int)
+
+ // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error.
+ MissError(cause VToken)
+}
+
+var _ SemanticActionSet = &SyntaxTreeActionSet{}
+
+// SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface.
+type SyntaxTreeNode interface {
+ // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast`
+ // directive with `...` operator.
+ ChildCount() int
+
+ // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast`
+ // directive with `...` operator.
+ ExpandChildren() []SyntaxTreeNode
+}
+
+var _ SyntaxTreeNode = &Node{}
+
+// SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types.
+// The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface.
+type SyntaxTreeBuilder interface {
+ Shift(kindName string, text string, row, col int) SyntaxTreeNode
+ ShiftError(kindName string) SyntaxTreeNode
+ Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode
+ Accept(f SyntaxTreeNode)
+}
+
+var _ SyntaxTreeBuilder = &DefaulSyntaxTreeBuilder{}
+
+// DefaulSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder.
+type DefaulSyntaxTreeBuilder struct {
+ tree *Node
+}
+
+// NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder.
+func NewDefaultSyntaxTreeBuilder() *DefaulSyntaxTreeBuilder {
+ return &DefaulSyntaxTreeBuilder{}
+}
+
+// Shift is a implementation of SyntaxTreeBuilder.Shift.
+func (b *DefaulSyntaxTreeBuilder) Shift(kindName string, text string, row, col int) SyntaxTreeNode {
+ return &Node{
+ Type: NodeTypeTerminal,
+ KindName: kindName,
+ Text: text,
+ Row: row,
+ Col: col,
+ }
+}
+
+// ShiftError is a implementation of SyntaxTreeBuilder.ShiftError.
+func (b *DefaulSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode {
+ return &Node{
+ Type: NodeTypeError,
+ KindName: kindName,
+ }
+}
+
+// Reduce is a implementation of SyntaxTreeBuilder.Reduce.
+func (b *DefaulSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode {
+ cNodes := make([]*Node, len(children))
+ for i, c := range children {
+ cNodes[i] = c.(*Node)
+ }
+ return &Node{
+ Type: NodeTypeNonTerminal,
+ KindName: kindName,
+ Children: cNodes,
+ }
+}
+
+// Accept is a implementation of SyntaxTreeBuilder.Accept.
+func (b *DefaulSyntaxTreeBuilder) Accept(f SyntaxTreeNode) {
+ b.tree = f.(*Node)
+}
+
+// Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil.
+func (b *DefaulSyntaxTreeBuilder) Tree() *Node {
+ return b.tree
+}
+
+// SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree.
+type SyntaxTreeActionSet struct {
+ gram Grammar
+ builder SyntaxTreeBuilder
+ semStack *semanticStack
+ disableASTAction bool
+}
+
+// NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree).
+// When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them.
+func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet {
+ return &SyntaxTreeActionSet{
+ gram: gram,
+ builder: builder,
+ semStack: newSemanticStack(),
+ }
+}
+
+// NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree).
+// Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them.
+func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet {
+ return &SyntaxTreeActionSet{
+ gram: gram,
+ builder: builder,
+ semStack: newSemanticStack(),
+ disableASTAction: true,
+ }
+}
+
+// Shift is a implementation of SemanticActionSet.Shift method.
+func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) {
+ term := a.tokenToTerminal(tok)
+ row, col := tok.Position()
+ a.semStack.push(a.builder.Shift(a.gram.Terminal(term), string(tok.Lexeme()), row, col))
+}
+
+// Reduce is a implementation of SemanticActionSet.Reduce method.
+func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) {
+ lhs := a.gram.LHS(prodNum)
+
+ // When an alternative is empty, `n` will be 0, and `handle` will be empty slice.
+ n := a.gram.AlternativeSymbolCount(prodNum)
+ handle := a.semStack.pop(n)
+
+ var astAct []int
+ if !a.disableASTAction {
+ astAct = a.gram.ASTAction(prodNum)
+ }
+ var children []SyntaxTreeNode
+ if astAct != nil {
+ // Count the number of children in advance to avoid frequent growth in a slice for children.
+ {
+ l := 0
+ for _, e := range astAct {
+ if e > 0 {
+ l++
+ } else {
+ offset := e*-1 - 1
+ l += handle[offset].ChildCount()
+ }
+ }
+
+ children = make([]SyntaxTreeNode, l)
+ }
+
+ p := 0
+ for _, e := range astAct {
+ if e > 0 {
+ offset := e - 1
+ children[p] = handle[offset]
+ p++
+ } else {
+ offset := e*-1 - 1
+ for _, c := range handle[offset].ExpandChildren() {
+ children[p] = c
+ p++
+ }
+ }
+ }
+ } else {
+ // If an alternative has no AST action, a driver generates
+ // a node with the same structure as a CST.
+ children = handle
+ }
+
+ a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children))
+}
+
+// Accept is a implementation of SemanticActionSet.Accept method.
+func (a *SyntaxTreeActionSet) Accept() {
+ top := a.semStack.pop(1)
+ a.builder.Accept(top[0])
+}
+
+// TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method.
+func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) {
+ a.semStack.pop(popped)
+ a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error())))
+}
+
+// MissError is a implementation of SemanticActionSet.MissError method.
+func (a *SyntaxTreeActionSet) MissError(cause VToken) {
+}
+
+func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int {
+ if tok.EOF() {
+ return a.gram.EOF()
+ }
+
+ return tok.TerminalID()
+}
+
+type semanticStack struct {
+ frames []SyntaxTreeNode
+}
+
+func newSemanticStack() *semanticStack {
+ return &semanticStack{
+ frames: make([]SyntaxTreeNode, 0, 100),
+ }
+}
+
+func (s *semanticStack) push(f SyntaxTreeNode) {
+ s.frames = append(s.frames, f)
+}
+
+func (s *semanticStack) pop(n int) []SyntaxTreeNode {
+ fs := s.frames[len(s.frames)-n:]
+ s.frames = s.frames[:len(s.frames)-n]
+
+ return fs
+}
+
+type NodeType int
+
+const (
+ NodeTypeError = 0
+ NodeTypeTerminal = 1
+ NodeTypeNonTerminal = 2
+)
+
+// Node is a implementation of SyntaxTreeNode interface.
+type Node struct {
+ Type NodeType
+ KindName string
+ Text string
+ Row int
+ Col int
+ Children []*Node
+}
+
+func (n *Node) MarshalJSON() ([]byte, error) {
+ switch n.Type {
+ case NodeTypeError:
+ return json.Marshal(struct {
+ Type NodeType `json:"type"`
+ KindName string `json:"kind_name"`
+ }{
+ Type: n.Type,
+ KindName: n.KindName,
+ })
+ case NodeTypeTerminal:
+ return json.Marshal(struct {
+ Type NodeType `json:"type"`
+ KindName string `json:"kind_name"`
+ Text string `json:"text"`
+ Row int `json:"row"`
+ Col int `json:"col"`
+ }{
+ Type: n.Type,
+ KindName: n.KindName,
+ Text: n.Text,
+ Row: n.Row,
+ Col: n.Col,
+ })
+ case NodeTypeNonTerminal:
+ return json.Marshal(struct {
+ Type NodeType `json:"type"`
+ KindName string `json:"kind_name"`
+ Children []*Node `json:"children"`
+ }{
+ Type: n.Type,
+ KindName: n.KindName,
+ Children: n.Children,
+ })
+ default:
+ return nil, fmt.Errorf("invalid node type: %v", n.Type)
+ }
+}
+
+// ChildCount is a implementation of SyntaxTreeNode.ChildCount.
+func (n *Node) ChildCount() int {
+ return len(n.Children)
+}
+
+// ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren.
+func (n *Node) ExpandChildren() []SyntaxTreeNode {
+ fs := make([]SyntaxTreeNode, len(n.Children))
+ for i, n := range n.Children {
+ fs[i] = n
+ }
+ return fs
+}
+
+// PrintTree prints a syntax tree whose root is `node`.
+func PrintTree(w io.Writer, node *Node) {
+ printTree(w, node, "", "")
+}
+
+func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) {
+ if node == nil {
+ return
+ }
+
+ switch node.Type {
+ case NodeTypeError:
+ fmt.Fprintf(w, "%v!%v\n", ruledLine, node.KindName)
+ case NodeTypeTerminal:
+ fmt.Fprintf(w, "%v%v %#v\n", ruledLine, node.KindName, node.Text)
+ case NodeTypeNonTerminal:
+ fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
+
+ num := len(node.Children)
+ for i, child := range node.Children {
+ var line string
+ if num > 1 && i < num-1 {
+ line = "├─ "
+ } else {
+ line = "└─ "
+ }
+
+ var prefix string
+ if i >= num-1 {
+ prefix = " "
+ } else {
+ prefix = "│ "
+ }
+
+ printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix)
+ }
+ }
+}
diff --git a/tester/tester.go b/tester/tester.go
new file mode 100644
index 0000000..ef3ca61
--- /dev/null
+++ b/tester/tester.go
@@ -0,0 +1,177 @@
+package tester
+
+import (
+ "bytes"
+ "fmt"
+ "os"
+ "path/filepath"
+ "runtime/debug"
+ "strings"
+
+ "github.com/nihei9/vartan/driver"
+ gspec "github.com/nihei9/vartan/spec/grammar"
+ tspec "github.com/nihei9/vartan/spec/test"
+)
+
+type TestResult struct {
+ TestCasePath string
+ Error error
+ Diffs []*tspec.TreeDiff
+}
+
+func (r *TestResult) String() string {
+ if r.Error != nil {
+ const indent1 = " "
+ const indent2 = indent1 + indent1
+
+ msgLines := strings.Split(r.Error.Error(), "\n")
+ msg := fmt.Sprintf("Failed %v:\n%v%v", r.TestCasePath, indent1, strings.Join(msgLines, "\n"+indent1))
+ if len(r.Diffs) == 0 {
+ return msg
+ }
+ var diffLines []string
+ for _, diff := range r.Diffs {
+ diffLines = append(diffLines, diff.Message)
+ diffLines = append(diffLines, fmt.Sprintf("%vexpected path: %v", indent1, diff.ExpectedPath))
+ diffLines = append(diffLines, fmt.Sprintf("%vactual path: %v", indent1, diff.ActualPath))
+ }
+ return fmt.Sprintf("%v\n%v%v", msg, indent2, strings.Join(diffLines, "\n"+indent2))
+ }
+ return fmt.Sprintf("Passed %v", r.TestCasePath)
+}
+
+type TestCaseWithMetadata struct {
+ TestCase *tspec.TestCase
+ FilePath string
+ Error error
+}
+
+func ListTestCases(testPath string) []*TestCaseWithMetadata {
+ fi, err := os.Stat(testPath)
+ if err != nil {
+ return []*TestCaseWithMetadata{
+ {
+ FilePath: testPath,
+ Error: err,
+ },
+ }
+ }
+ if !fi.IsDir() {
+ c, err := parseTestCase(testPath)
+ return []*TestCaseWithMetadata{
+ {
+ TestCase: c,
+ FilePath: testPath,
+ Error: err,
+ },
+ }
+ }
+
+ es, err := os.ReadDir(testPath)
+ if err != nil {
+ return []*TestCaseWithMetadata{
+ {
+ FilePath: testPath,
+ Error: err,
+ },
+ }
+ }
+ var cases []*TestCaseWithMetadata
+ for _, e := range es {
+ cs := ListTestCases(filepath.Join(testPath, e.Name()))
+ cases = append(cases, cs...)
+ }
+ return cases
+}
+
+func parseTestCase(testCasePath string) (*tspec.TestCase, error) {
+ f, err := os.Open(testCasePath)
+ if err != nil {
+ return nil, err
+ }
+ defer f.Close()
+ return tspec.ParseTestCase(f)
+}
+
+type Tester struct {
+ Grammar *gspec.CompiledGrammar
+ Cases []*TestCaseWithMetadata
+}
+
+func (t *Tester) Run() []*TestResult {
+ var rs []*TestResult
+ for _, c := range t.Cases {
+ rs = append(rs, runTest(t.Grammar, c))
+ }
+ return rs
+}
+
+func runTest(g *gspec.CompiledGrammar, c *TestCaseWithMetadata) *TestResult {
+ var p *driver.Parser
+ var tb *driver.DefaulSyntaxTreeBuilder
+ {
+ gram := driver.NewGrammar(g)
+ toks, err := driver.NewTokenStream(g, bytes.NewReader(c.TestCase.Source))
+ if err != nil {
+ return &TestResult{
+ TestCasePath: c.FilePath,
+ Error: err,
+ }
+ }
+ tb = driver.NewDefaultSyntaxTreeBuilder()
+ p, err = driver.NewParser(toks, gram, driver.SemanticAction(driver.NewASTActionSet(gram, tb)))
+ if err != nil {
+ return &TestResult{
+ TestCasePath: c.FilePath,
+ Error: err,
+ }
+ }
+ }
+
+ err := p.Parse()
+ if err != nil {
+ return &TestResult{
+ TestCasePath: c.FilePath,
+ Error: err,
+ }
+ }
+
+ if tb.Tree() == nil {
+ var err error
+ if len(p.SyntaxErrors()) > 0 {
+ err = fmt.Errorf("parse tree was not generated: syntax error occurred")
+ } else {
+ // The parser should always generate a parse tree in the vartan-test command, so if there is no parse
+ // tree, it is a bug. We also include a stack trace in the error message to be sure.
+ err = fmt.Errorf("parse tree was not generated: no syntax error:\n%v", string(debug.Stack()))
+ }
+ return &TestResult{
+ TestCasePath: c.FilePath,
+ Error: err,
+ }
+ }
+
+ // When a parse tree exists, the test continues regardless of whether or not syntax errors occurred.
+ diffs := tspec.DiffTree(genTree(tb.Tree()).Fill(), c.TestCase.Output)
+ if len(diffs) > 0 {
+ return &TestResult{
+ TestCasePath: c.FilePath,
+ Error: fmt.Errorf("output mismatch"),
+ Diffs: diffs,
+ }
+ }
+ return &TestResult{
+ TestCasePath: c.FilePath,
+ }
+}
+
+func genTree(dTree *driver.Node) *tspec.Tree {
+ var children []*tspec.Tree
+ if len(dTree.Children) > 0 {
+ children = make([]*tspec.Tree, len(dTree.Children))
+ for i, c := range dTree.Children {
+ children[i] = genTree(c)
+ }
+ }
+ return tspec.NewTree(dTree.KindName, children...)
+}
diff --git a/tester/tester_test.go b/tester/tester_test.go
new file mode 100644
index 0000000..957d739
--- /dev/null
+++ b/tester/tester_test.go
@@ -0,0 +1,170 @@
+package tester
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/grammar"
+ gspec "github.com/nihei9/vartan/spec/grammar"
+ tspec "github.com/nihei9/vartan/spec/test"
+)
+
+func TestTester_Run(t *testing.T) {
+ grammarSrc1 := `
+#name test;
+
+s
+ : foo bar baz
+ | foo error baz #recover
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+foo
+ : 'foo';
+bar
+ : 'bar';
+baz
+ : 'baz';
+`
+
+ grammarSrc2 := `
+#name test;
+
+s
+ : foos
+ ;
+foos
+ : foos foo #ast foos... foo
+ | foo
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+foo
+ : 'foo';
+`
+
+ tests := []struct {
+ grammarSrc string
+ testSrc string
+ error bool
+ }{
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo bar baz
+---
+(s
+ (foo) (bar) (baz))
+`,
+ },
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo ? baz
+---
+(s
+ (foo) (error) (baz))
+`,
+ },
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo bar baz
+---
+(s)
+`,
+ error: true,
+ },
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo bar baz
+---
+(s
+ (foo) (bar))
+`,
+ error: true,
+ },
+ {
+ grammarSrc: grammarSrc1,
+ testSrc: `
+Test
+---
+foo bar baz
+---
+(s
+ (foo) (bar) (xxx))
+`,
+ error: true,
+ },
+ {
+ grammarSrc: grammarSrc2,
+ testSrc: `
+Test
+---
+foo foo foo
+---
+(s
+ (foos
+ (foo) (foo) (foo)))
+`,
+ },
+ }
+ for _, tt := range tests {
+ ast, err := gspec.Parse(strings.NewReader(tt.grammarSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ g, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+ cg, _, err := grammar.Compile(g)
+ if err != nil {
+ t.Fatal(err)
+ }
+ c, err := tspec.ParseTestCase(strings.NewReader(tt.testSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+ tester := &Tester{
+ Grammar: cg,
+ Cases: []*TestCaseWithMetadata{
+ {
+ TestCase: c,
+ },
+ },
+ }
+ rs := tester.Run()
+ if tt.error {
+ errOccurred := false
+ for _, r := range rs {
+ if r.Error != nil {
+ errOccurred = true
+ }
+ }
+ if !errOccurred {
+ t.Fatal("this test must fail, but it passed")
+ }
+ } else {
+ for _, r := range rs {
+ if r.Error != nil {
+ t.Fatalf("unexpected error occurred: %v", r.Error)
+ }
+ }
+ }
+ }
+}