aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2024-12-11 16:48:12 -0300
committerEuAndreh <eu@euandre.org>2024-12-11 16:48:12 -0300
commit27b4729bd1a57740ea68e774d58d9cb3f45c5589 (patch)
tree152ff5686ade087e29e102cbbd39c0405cb63c02 /tests
parentConsolidate packages spread across multiple files into single one (diff)
downloadcotia-27b4729bd1a57740ea68e774d58d9cb3f45c5589.tar.gz
cotia-27b4729bd1a57740ea68e774d58d9cb3f45c5589.tar.xz
Do the same single file consolidation on tests
Diffstat (limited to 'tests')
-rw-r--r--tests/unit/compressor.go (renamed from tests/unit/compressor/compressor_test.go)0
-rw-r--r--tests/unit/driver/lexer.go (renamed from tests/unit/driver/lexer/lexer_test.go)0
-rw-r--r--tests/unit/driver/parser.go1972
-rw-r--r--tests/unit/driver/parser/conflict_test.go524
-rw-r--r--tests/unit/driver/parser/lac_test.go120
-rw-r--r--tests/unit/driver/parser/parser_test.go833
-rw-r--r--tests/unit/driver/parser/semantic_action_test.go227
-rw-r--r--tests/unit/driver/parser/syntax_error_test.go306
-rw-r--r--tests/unit/grammar.go (renamed from tests/unit/grammar/grammar_test.go)1266
-rw-r--r--tests/unit/grammar/first_test.go219
-rw-r--r--tests/unit/grammar/lalr1_test.go187
-rw-r--r--tests/unit/grammar/lexical.go (renamed from tests/unit/grammar/lexical/compiler_test.go)0
-rw-r--r--tests/unit/grammar/lexical/dfa.go (renamed from tests/unit/grammar/lexical/dfa/tree_test.go)185
-rw-r--r--tests/unit/grammar/lexical/dfa/dfa_test.go121
-rw-r--r--tests/unit/grammar/lexical/dfa/symbol_position_test.go79
-rw-r--r--tests/unit/grammar/lexical/parser.go (renamed from tests/unit/grammar/lexical/parser/parser_test.go)518
-rw-r--r--tests/unit/grammar/lexical/parser/lexer_test.go524
-rw-r--r--tests/unit/grammar/lr0_test.go448
-rw-r--r--tests/unit/grammar/parsing_table_test.go387
-rw-r--r--tests/unit/grammar/symbol.go (renamed from tests/unit/grammar/symbol/symbol_test.go)0
-rw-r--r--tests/unit/grammar/test_helper_test.go68
-rw-r--r--tests/unit/spec/grammar/parser.go (renamed from tests/unit/spec/grammar/parser/parser_test.go)228
-rw-r--r--tests/unit/spec/grammar/parser/lexer_test.go236
-rw-r--r--tests/unit/spec/test.go (renamed from tests/unit/spec/test/parser_test.go)0
-rw-r--r--tests/unit/tester.go (renamed from tests/unit/tester/tester_test.go)0
-rw-r--r--tests/unit/utf8.go (renamed from tests/unit/utf8/utf8_test.go)0
26 files changed, 4169 insertions, 4279 deletions
diff --git a/tests/unit/compressor/compressor_test.go b/tests/unit/compressor.go
index 621b731..621b731 100644
--- a/tests/unit/compressor/compressor_test.go
+++ b/tests/unit/compressor.go
diff --git a/tests/unit/driver/lexer/lexer_test.go b/tests/unit/driver/lexer.go
index a3d0231..a3d0231 100644
--- a/tests/unit/driver/lexer/lexer_test.go
+++ b/tests/unit/driver/lexer.go
diff --git a/tests/unit/driver/parser.go b/tests/unit/driver/parser.go
new file mode 100644
index 0000000..31fec45
--- /dev/null
+++ b/tests/unit/driver/parser.go
@@ -0,0 +1,1972 @@
+package parser
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+ "testing"
+
+ "urubu/grammar"
+ spec "urubu/spec/grammar"
+ "urubu/spec/grammar/parser"
+)
+
+func TestParserWithConflicts(t *testing.T) {
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ cst *Node
+ }{
+ {
+ caption: "when a shift/reduce conflict occurred, we prioritize the shift action",
+ specSrc: `
+#name test;
+
+expr
+ : expr assign expr
+ | id
+ ;
+
+id: "[A-Za-z0-9_]+";
+assign: '=';
+`,
+ src: `foo=bar=baz`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "foo"),
+ ),
+ termNode("assign", "="),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "bar"),
+ ),
+ termNode("assign", "="),
+ nonTermNode("expr",
+ termNode("id", "baz"),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar",
+ specSrc: `
+#name test;
+
+s
+ : a
+ | b
+ ;
+a
+ : id
+ ;
+b
+ : id
+ ;
+
+id: "[A-Za-z0-9_]+";
+`,
+ src: `foo`,
+ cst: nonTermNode("s",
+ nonTermNode("a",
+ termNode("id", "foo"),
+ ),
+ ),
+ },
+ {
+ caption: "left associativities defined earlier in the grammar have higher precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #left mul
+ #left add
+);
+
+expr
+ : expr add expr
+ | expr mul expr
+ | id
+ ;
+
+id: "[A-Za-z0-9_]+";
+add: '+';
+mul: '*';
+`,
+ src: `a+b*c*d+e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ },
+ {
+ caption: "left associativities defined in the same line have the same precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #left add sub
+);
+
+expr
+ : expr add expr
+ | expr sub expr
+ | id
+ ;
+
+id: "[A-Za-z0-9_]+";
+add: '+';
+sub: '-';
+`,
+ src: `a-b+c+d-e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ ),
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ },
+ {
+ caption: "right associativities defined earlier in the grammar have higher precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #right r1
+ #right r2
+);
+
+expr
+ : expr r2 expr
+ | expr r1 expr
+ | id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+r1
+ : 'r1';
+r2
+ : 'r2';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ src: `a r2 b r1 c r1 d r2 e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("r2", "r2"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("r1", "r1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ termNode("r1", "r1"),
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ ),
+ ),
+ termNode("r2", "r2"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "right associativities defined in the same line have the same precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #right r1 r2
+);
+
+expr
+ : expr r2 expr
+ | expr r1 expr
+ | id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+r1
+ : 'r1';
+r2
+ : 'r2';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ src: `a r2 b r1 c r1 d r2 e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("r2", "r2"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("r1", "r1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ termNode("r1", "r1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ termNode("r2", "r2"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign a1
+ #assign a2
+);
+
+expr
+ : expr a2 expr
+ | expr a1 expr
+ | id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+a1
+ : 'a1';
+a2
+ : 'a2';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ src: `a a2 b a1 c a1 d a2 e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("a2", "a2"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("a1", "a1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ termNode("a1", "a1"),
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ ),
+ ),
+ termNode("a2", "a2"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "terminal symbols with an #assign directive defined in the same line have the same precedence",
+ specSrc: `
+#name test;
+
+#prec (
+ #assign a1 a2
+);
+
+expr
+ : expr a2 expr
+ | expr a1 expr
+ | id
+ ;
+
+whitespaces #skip
+ : "[\u{0009}\u{0020}]+";
+a1
+ : 'a1';
+a2
+ : 'a2';
+id
+ : "[A-Za-z0-9_]+";
+`,
+ src: `a a2 b a1 c a1 d a2 e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("a2", "a2"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("a1", "a1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ termNode("a1", "a1"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ termNode("a2", "a2"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ ),
+ ),
+ },
+ {
+ caption: "#left, #right, and #assign can be mixed",
+ specSrc: `
+#name test;
+
+#prec (
+ #left mul div
+ #left add sub
+ #assign else
+ #assign then
+ #right assign
+);
+
+expr
+ : expr add expr
+ | expr sub expr
+ | expr mul expr
+ | expr div expr
+ | expr assign expr
+ | if expr then expr
+ | if expr then expr else expr
+ | id
+ ;
+
+ws #skip: "[\u{0009}\u{0020}]+";
+if: 'if';
+then: 'then';
+else: 'else';
+id: "[A-Za-z0-9_]+";
+add: '+';
+sub: '-';
+mul: '*';
+div: '/';
+assign: '=';
+`,
+ src: `x = y = a + b * c - d / e + if f then if g then h else i`,
+ cst: nonTermNode(
+ "expr",
+ nonTermNode("expr",
+ termNode("id", "x"),
+ ),
+ termNode("assign", "="),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "y"),
+ ),
+ termNode("assign", "="),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "a"),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "b"),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("id", "c"),
+ ),
+ ),
+ ),
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("id", "d"),
+ ),
+ termNode("div", "/"),
+ nonTermNode("expr",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("if", "if"),
+ nonTermNode("expr",
+ termNode("id", "f"),
+ ),
+ termNode("then", "then"),
+ nonTermNode("expr",
+ termNode("if", "if"),
+ nonTermNode("expr",
+ termNode("id", "g"),
+ ),
+ termNode("then", "then"),
+ nonTermNode("expr",
+ termNode("id", "h"),
+ ),
+ termNode("else", "else"),
+ nonTermNode("expr",
+ termNode("id", "i"),
+ ),
+ ),
+ ),
+ ),
+ ),
+ ),
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ cg, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ gram := NewGrammar(cg)
+ tb := NewDefaultSyntaxTreeBuilder()
+ p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb)))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if tt.cst != nil {
+ testTree(t, tb.Tree(), tt.cst)
+ }
+ })
+ }
+}
+
+func TestParserWithLAC(t *testing.T) {
+ specSrc := `
+#name test;
+
+s
+ : t t
+ ;
+t
+ : c t
+ | d
+ ;
+
+c: 'c';
+d: 'd';
+`
+
+ src := `ccd`
+
+ actLogWithLAC := []string{
+ "shift/c",
+ "shift/c",
+ "shift/d",
+ "miss",
+ }
+
+ actLogWithoutLAC := []string{
+ "shift/c",
+ "shift/c",
+ "shift/d",
+ "reduce/t",
+ "reduce/t",
+ "reduce/t",
+ "miss",
+ }
+
+ ast, err := parser.Parse(strings.NewReader(specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ gram, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ t.Run("LAC is enabled", func(t *testing.T) {
+ semAct := &testSemAct{
+ gram: gram,
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(semAct.actLog) != len(actLogWithLAC) {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
+ }
+
+ for i, e := range actLogWithLAC {
+ if semAct.actLog[i] != e {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
+ }
+ }
+ })
+
+ t.Run("LAC is disabled", func(t *testing.T) {
+ semAct := &testSemAct{
+ gram: gram,
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC())
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(semAct.actLog) != len(actLogWithoutLAC) {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
+ }
+
+ for i, e := range actLogWithoutLAC {
+ if semAct.actLog[i] != e {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
+ }
+ }
+ })
+}
+
+func termNode(kind string, text string, children ...*Node) *Node {
+ return &Node{
+ Type: NodeTypeTerminal,
+ KindName: kind,
+ Text: text,
+ Children: children,
+ }
+}
+
+func errorNode() *Node {
+ return &Node{
+ Type: NodeTypeError,
+ KindName: "error",
+ }
+}
+
+func nonTermNode(kind string, children ...*Node) *Node {
+ return &Node{
+ Type: NodeTypeNonTerminal,
+ KindName: kind,
+ Children: children,
+ }
+}
+
+func TestParser_Parse(t *testing.T) {
+ tests := []struct {
+ specSrc string
+ src string
+ synErr bool
+ cst *Node
+ ast *Node
+ }{
+ {
+ specSrc: `
+#name test;
+
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+
+add
+ : '+';
+mul
+ : '*';
+l_paren
+ : '(';
+r_paren
+ : ')';
+id
+ : "[A-Za-z_][0-9A-Za-z_]*";
+`,
+ src: `(a+(b+c))*d+e`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("term",
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("l_paren", "("),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("id", "a"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("l_paren", "("),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("id", "b"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("id", "c"),
+ ),
+ ),
+ ),
+ termNode("r_paren", ")"),
+ ),
+ ),
+ ),
+ termNode("r_paren", ")"),
+ ),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("factor",
+ termNode("id", "d"),
+ ),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("term",
+ nonTermNode("factor",
+ termNode("id", "e"),
+ ),
+ ),
+ ),
+ },
+ // Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are
+ // not allowed in string literals.
+ {
+ specSrc: `
+#name test;
+
+s
+ : a b c
+ ;
+
+a
+ : '\f{foo}';
+b
+ : '\u{0000}';
+c
+ : '\p{gc=Letter}';
+`,
+ src: `\f{foo}\u{0000}\p{gc=Letter}`,
+ cst: nonTermNode("s",
+ termNode("a", `\f{foo}`),
+ termNode("b", `\u{0000}`),
+ termNode("c", `\p{gc=Letter}`),
+ ),
+ },
+ // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar
+ : bar_text
+ |
+ ;
+bar_text: "bar";
+`,
+ src: ``,
+ cst: nonTermNode("s",
+ nonTermNode("foo"),
+ nonTermNode("bar"),
+ ),
+ },
+ // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar
+ : bar_text
+ |
+ ;
+
+bar_text
+ : "bar";
+`,
+ src: `bar`,
+ cst: nonTermNode("s",
+ nonTermNode("foo"),
+ nonTermNode("bar",
+ termNode("bar_text", "bar"),
+ ),
+ ),
+ },
+ // A production can have multiple alternative productions.
+ {
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $uminus
+ #left mul div
+ #left add sub
+);
+
+expr
+ : expr add expr
+ | expr sub expr
+ | expr mul expr
+ | expr div expr
+ | int
+ | sub int #prec $uminus // This 'sub' means the unary minus symbol.
+ ;
+
+int
+ : "0|[1-9][0-9]*";
+add
+ : '+';
+sub
+ : '-';
+mul
+ : '*';
+div
+ : '/';
+`,
+ src: `-1*-2+3-4/5`,
+ ast: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("sub", "-"),
+ termNode("int", "1"),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("sub", "-"),
+ termNode("int", "2"),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("int", "3"),
+ ),
+ ),
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("int", "4"),
+ ),
+ termNode("div", "/"),
+ nonTermNode("expr",
+ termNode("int", "5"),
+ ),
+ ),
+ ),
+ },
+ // A lexical production can have multiple production directives.
+ {
+ specSrc: `
+#name test;
+
+s
+ : push_a push_b pop pop
+ ;
+
+push_a #mode default #push a
+ : '->a';
+push_b #mode a #push b
+ : '->b';
+pop #mode a b #pop
+ : '<-';
+`,
+ src: `->a->b<-<-`,
+ ast: nonTermNode("s",
+ termNode("push_a", "->a"),
+ termNode("push_b", "->b"),
+ termNode("pop", "<-"),
+ termNode("pop", "<-"),
+ ),
+ },
+ {
+ specSrc: `
+#name test;
+
+mode_tran_seq
+ : mode_tran_seq mode_tran
+ | mode_tran
+ ;
+mode_tran
+ : push_m1
+ | push_m2
+ | pop_m1
+ | pop_m2
+ ;
+
+push_m1 #push m1
+ : "->";
+push_m2 #mode m1 #push m2
+ : "-->";
+pop_m1 #mode m1 #pop
+ : "<-";
+pop_m2 #mode m2 #pop
+ : "<--";
+whitespace #mode default m1 m2 #skip
+ : "\u{0020}+";
+`,
+ src: ` -> --> <-- <- `,
+ },
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo
+ : "foo";
+bar #mode default
+ : "bar";
+`,
+ src: `foobar`,
+ },
+ // When #push and #pop are applied to the same symbol, #pop will run first, then #push.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar baz
+ ;
+
+foo #push m1
+ : 'foo';
+bar #mode m1 #pop #push m2
+ : 'bar';
+baz #mode m2
+ : 'baz';
+`,
+ src: `foobarbaz`,
+ ast: nonTermNode("s",
+ termNode("foo", "foo"),
+ termNode("bar", "bar"),
+ termNode("baz", "baz"),
+ ),
+ },
+ // When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first
+ // in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which
+ // they are executed.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar baz
+ ;
+
+foo #push m1
+ : 'foo';
+bar #mode m1 #push m2 #pop
+ : 'bar';
+baz #mode m2
+ : 'baz';
+`,
+ src: `foobarbaz`,
+ ast: nonTermNode("s",
+ termNode("foo", "foo"),
+ termNode("bar", "bar"),
+ termNode("baz", "baz"),
+ ),
+ },
+ // The parser can skips specified tokens.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo bar
+ ;
+
+foo
+ : "foo";
+bar
+ : "bar";
+white_space #skip
+ : "[\u{0009}\u{0020}]+";
+`,
+ src: `foo bar`,
+ },
+ // A grammar can contain fragments.
+ {
+ specSrc: `
+#name test;
+
+s
+ : tagline
+ ;
+tagline
+ : "\f{words} IS OUT THERE.";
+fragment words
+ : "[A-Za-z\u{0020}]+";
+`,
+ src: `THE TRUTH IS OUT THERE.`,
+ },
+ // A grammar can contain ast actions.
+ {
+ specSrc: `
+#name test;
+
+list
+ : l_bracket elems r_bracket #ast elems...
+ ;
+elems
+ : elems comma id #ast elems... id
+ | id
+ ;
+
+whitespace #skip
+ : "\u{0020}+";
+l_bracket
+ : '[';
+r_bracket
+ : ']';
+comma
+ : ',';
+id
+ : "[A-Za-z]+";
+`,
+ src: `[Byers, Frohike, Langly]`,
+ cst: nonTermNode("list",
+ termNode("x_1", "["),
+ nonTermNode("elems",
+ nonTermNode("elems",
+ nonTermNode("elems",
+ termNode("id", "Byers"),
+ ),
+ termNode("x_3", ","),
+ termNode("id", "Frohike"),
+ ),
+ termNode("x_3", ","),
+ termNode("id", "Langly"),
+ ),
+ termNode("x_2", "]"),
+ ),
+ ast: nonTermNode("list",
+ termNode("id", "Byers"),
+ termNode("id", "Frohike"),
+ termNode("id", "Langly"),
+ ),
+ },
+ // The '...' operator can expand child nodes.
+ {
+ specSrc: `
+#name test;
+
+s
+ : a #ast a...
+ ;
+a
+ : a comma foo #ast a... foo
+ | foo
+ ;
+
+comma
+ : ',';
+foo
+ : 'foo';
+`,
+ src: `foo,foo,foo`,
+ ast: nonTermNode("s",
+ termNode("foo", "foo"),
+ termNode("foo", "foo"),
+ termNode("foo", "foo"),
+ ),
+ },
+ // The '...' operator also can applied to an element having no children.
+ {
+ specSrc: `
+#name test;
+
+s
+ : a semi_colon #ast a...
+ ;
+a
+ :
+ ;
+
+semi_colon
+ : ';';
+`,
+ src: `;`,
+ ast: nonTermNode("s"),
+ },
+ // A label can be a parameter of #ast directive.
+ {
+ specSrc: `
+#name test;
+
+#prec (
+ #left add sub
+);
+
+expr
+ : expr@lhs add expr@rhs #ast add lhs rhs
+ | expr@lhs sub expr@rhs #ast sub lhs rhs
+ | num
+ ;
+
+add
+ : '+';
+sub
+ : '-';
+num
+ : "0|[1-9][0-9]*";
+`,
+ src: `1+2-3`,
+ ast: nonTermNode("expr",
+ termNode("sub", "-"),
+ nonTermNode("expr",
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("num", "1"),
+ ),
+ nonTermNode("expr",
+ termNode("num", "2"),
+ ),
+ ),
+ nonTermNode("expr",
+ termNode("num", "3"),
+ ),
+ ),
+ },
+ // An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo@x semi_colon #ast foo
+ ;
+
+semi_colon
+ : ';';
+foo
+ : 'foo';
+`,
+ src: `foo;`,
+ ast: nonTermNode("s",
+ termNode("foo", "foo"),
+ ),
+ },
+ // A production has the same precedence and associativity as the right-most terminal symbol.
+ {
+ specSrc: `
+#name test;
+
+#prec (
+ #left add
+);
+
+expr
+ : expr add expr // This alternative has the same precedence and associativiry as 'add'.
+ | int
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+int
+ : "0|[1-9][0-9]*";
+add
+ : '+';
+`,
+ // This source is recognized as the following structure because the production `expr → expr add expr` has the same
+ // precedence and associativity as the symbol 'add'.
+ //
+ // ((1+2)+3)
+ //
+ // If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence
+ // and associativity and this source will be recognized as the following structure.
+ //
+ // (1+(2+3))
+ src: `1+2+3`,
+ ast: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("int", "1"),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("int", "2"),
+ ),
+ ),
+ termNode("add", "+"),
+ nonTermNode("expr",
+ termNode("int", "3"),
+ ),
+ ),
+ },
+ // The 'prec' directive can set precedence of a production.
+ {
+ specSrc: `
+#name test;
+
+#prec (
+ #assign $uminus
+ #left mul div
+ #left add sub
+);
+
+expr
+ : expr add expr
+ | expr sub expr
+ | expr mul expr
+ | expr div expr
+ | int
+ | sub int #prec $uminus // This 'sub' means a unary minus symbol.
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+int
+ : "0|[1-9][0-9]*";
+add
+ : '+';
+sub
+ : '-';
+mul
+ : '*';
+div
+ : '/';
+`,
+ // This source is recognized as the following structure because the production `expr → sub expr`
+ // has the `#prec mul` directive and has the same precedence of the symbol `mul`.
+ //
+ // (((-1) * 20) / 5)
+ //
+ // If the production doesn't have the `#prec` directive, this source will be recognized as
+ // the following structure.
+ //
+ // (- ((1 * 20) / 5))
+ src: `-1*20/5`,
+ cst: nonTermNode("expr",
+ nonTermNode("expr",
+ nonTermNode("expr",
+ termNode("sub", "-"),
+ termNode("int", "1"),
+ ),
+ termNode("mul", "*"),
+ nonTermNode("expr",
+ termNode("int", "20"),
+ ),
+ ),
+ termNode("div", "/"),
+ nonTermNode("expr",
+ termNode("int", "5"),
+ ),
+ ),
+ },
+ // The grammar can contain the 'error' symbol.
+ {
+ specSrc: `
+#name test;
+
+s
+ : id id id semi_colon
+ | error semi_colon
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+id
+ : "[A-Za-z_]+";
+`,
+ src: `foo bar baz ;`,
+ },
+ // The 'error' symbol can appear in an #ast directive.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo semi_colon
+ | error semi_colon #ast error
+ ;
+
+semi_colon
+ : ';';
+foo
+ : 'foo';
+`,
+ src: `bar;`,
+ synErr: true,
+ ast: nonTermNode("s",
+ errorNode(),
+ ),
+ },
+ // The 'error' symbol can have a label, and an #ast can reference it.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo semi_colon
+ | error@e semi_colon #ast e
+ ;
+
+semi_colon
+ : ';';
+foo
+ : 'foo';
+`,
+ src: `bar;`,
+ synErr: true,
+ ast: nonTermNode("s",
+ errorNode(),
+ ),
+ },
+ // The grammar can contain the 'recover' directive.
+ {
+ specSrc: `
+#name test;
+
+seq
+ : seq elem
+ | elem
+ ;
+elem
+ : id id id semi_colon
+ | error semi_colon #recover
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+id
+ : "[A-Za-z_]+";
+`,
+ src: `a b c ; d e f ;`,
+ },
+ // The same label can be used between different alternatives.
+ {
+ specSrc: `
+#name test;
+
+s
+ : foo@x bar
+ | foo@x
+ ;
+
+foo: 'foo';
+bar: 'bar';
+`,
+ src: `foo`,
+ },
+ }
+
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ cg, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ gram := NewGrammar(cg)
+ tb := NewDefaultSyntaxTreeBuilder()
+ var opt []ParserOption
+ switch {
+ case tt.ast != nil:
+ opt = append(opt, SemanticAction(NewASTActionSet(gram, tb)))
+ case tt.cst != nil:
+ opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb)))
+ }
+ p, err := NewParser(toks, gram, opt...)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if !tt.synErr && len(p.SyntaxErrors()) > 0 {
+ for _, synErr := range p.SyntaxErrors() {
+ t.Fatalf("unexpected syntax errors occurred: %v", synErr)
+ }
+ }
+
+ switch {
+ case tt.ast != nil:
+ testTree(t, tb.Tree(), tt.ast)
+ case tt.cst != nil:
+ testTree(t, tb.Tree(), tt.cst)
+ }
+ })
+ }
+}
+
+func testTree(t *testing.T, node, expected *Node) {
+ t.Helper()
+
+ if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text {
+ t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node)
+ }
+ if len(node.Children) != len(expected.Children) {
+ t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children))
+ }
+ for i, c := range node.Children {
+ testTree(t, c, expected.Children[i])
+ }
+}
+
+type testSemAct struct {
+ gram *spec.CompiledGrammar
+ actLog []string
+}
+
+func (a *testSemAct) Shift(tok VToken, recovered bool) {
+ t := a.gram.Syntactic.Terminals[tok.TerminalID()]
+ if recovered {
+ a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t))
+ } else {
+ a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t))
+ }
+}
+
+func (a *testSemAct) Reduce(prodNum int, recovered bool) {
+ lhsSym := a.gram.Syntactic.LHSSymbols[prodNum]
+ lhsText := a.gram.Syntactic.NonTerminals[lhsSym]
+ if recovered {
+ a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText))
+ } else {
+ a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText))
+ }
+}
+
+func (a *testSemAct) Accept() {
+ a.actLog = append(a.actLog, "accept")
+}
+
+func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) {
+ a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped))
+}
+
+func (a *testSemAct) MissError(cause VToken) {
+ a.actLog = append(a.actLog, "miss")
+}
+
+func TestParserWithSemanticAction(t *testing.T) {
+ specSrcWithErrorProd := `
+#name test;
+
+seq
+ : seq elem semicolon
+ | elem semicolon
+ | error star star semicolon
+ | error semicolon #recover
+ ;
+elem
+ : char char char
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semicolon
+ : ';';
+star
+ : '*';
+char
+ : "[a-z]";
+`
+
+ specSrcWithoutErrorProd := `
+#name test;
+
+seq
+ : seq elem semicolon
+ | elem semicolon
+ ;
+elem
+ : char char char
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semicolon
+ : ';';
+char
+ : "[a-z]";
+`
+
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ actLog []string
+ }{
+ {
+ caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.",
+ specSrc: specSrcWithErrorProd,
+ src: `a b c; d e f;`,
+ actLog: []string{
+ "shift/char",
+ "shift/char",
+ "shift/char",
+ "reduce/elem",
+ "shift/semicolon",
+ "reduce/seq",
+
+ "shift/char",
+ "shift/char",
+ "shift/char",
+ "reduce/elem",
+ "shift/semicolon",
+ "reduce/seq",
+
+ "accept",
+ },
+ },
+ {
+ caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.",
+ specSrc: specSrcWithErrorProd,
+ src: `a; b !; c d !; e ! * *; h i j;`,
+ actLog: []string{
+ "shift/char",
+ "trap/1/shift/error",
+ "shift/semicolon",
+ "reduce/seq/recovered",
+
+ "shift/char",
+ "trap/2/shift/error",
+ "shift/semicolon",
+ "reduce/seq/recovered",
+
+ "shift/char",
+ "shift/char",
+ "trap/3/shift/error",
+ "shift/semicolon",
+ "reduce/seq/recovered",
+
+ "shift/char",
+ "trap/2/shift/error",
+ "shift/star",
+ "shift/star",
+ // When the driver shifts three times, it recovers from an error.
+ "shift/semicolon/recovered",
+ "reduce/seq",
+
+ "shift/char",
+ "shift/char",
+ "shift/char",
+ "reduce/elem",
+ "shift/semicolon",
+ "reduce/seq",
+
+ // Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted
+ // according to the error production.
+ "accept",
+ },
+ },
+ {
+ caption: "when the input doesn't meet the error production, the driver calls `MissError`.",
+ specSrc: specSrcWithErrorProd,
+ src: `a !`,
+ actLog: []string{
+ "shift/char",
+ "trap/1/shift/error",
+
+ "miss",
+ },
+ },
+ {
+ caption: "when a syntax error isn't trapped, the driver calls `MissError`.",
+ specSrc: specSrcWithoutErrorProd,
+ src: `a !`,
+ actLog: []string{
+ "shift/char",
+
+ "miss",
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ gram, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ semAct := &testSemAct{
+ gram: gram,
+ }
+ p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ if len(semAct.actLog) != len(tt.actLog) {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
+ }
+
+ for i, e := range tt.actLog {
+ if semAct.actLog[i] != e {
+ t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
+ }
+ }
+ })
+ }
+}
+
+func TestParserWithSyntaxErrors(t *testing.T) {
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ synErrCount int
+ }{
+ {
+ caption: "the parser can report a syntax error",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `bar`,
+ synErrCount: 1,
+ },
+ {
+ caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state",
+ specSrc: `
+#name test;
+
+seq
+ : seq elem semi_colon
+ | elem semi_colon
+ | error semi_colon #recover
+ ;
+elem
+ : a b c
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+a
+ : 'a';
+b
+ : 'b';
+c
+ : 'c';
+`,
+ src: `!; a!; ab!;`,
+ synErrCount: 3,
+ },
+ {
+ caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears",
+ specSrc: `
+#name test;
+
+seq
+ : seq elem semi_colon
+ | elem semi_colon
+ | error semi_colon #recover
+ ;
+elem
+ : a b c
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+a
+ : 'a';
+b
+ : 'b';
+c
+ : 'c';
+`,
+ // After the parser trasits to the error state reading the first invalid symbol ('!'),
+ // the second and third invalid symbols ('!') are ignored.
+ src: `! ! !; a!; ab!;`,
+ synErrCount: 3,
+ },
+ {
+ caption: "when the parser performs shift three times, the parser recovers from the error state",
+ specSrc: `
+#name test;
+
+seq
+ : seq elem semi_colon
+ | elem semi_colon
+ | error star star semi_colon
+ ;
+elem
+ : a b c
+ ;
+
+ws #skip
+ : "[\u{0009}\u{0020}]+";
+semi_colon
+ : ';';
+star
+ : '*';
+a
+ : 'a';
+b
+ : 'b';
+c
+ : 'c';
+`,
+ src: `!**; a!**; ab!**; abc!`,
+ synErrCount: 4,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ gram, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ synErrs := p.SyntaxErrors()
+ if len(synErrs) != tt.synErrCount {
+ t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs))
+ }
+ })
+ }
+}
+
+func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) {
+ tests := []struct {
+ caption string
+ specSrc string
+ src string
+ cause string
+ expected []string
+ }{
+ {
+ caption: "the parser reports an expected lookahead symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `bar`,
+ cause: `bar`,
+ expected: []string{
+ "foo",
+ },
+ },
+ {
+ caption: "the parser reports expected lookahead symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ | bar
+ ;
+
+foo
+ : 'foo';
+bar
+ : 'bar';
+`,
+ src: `baz`,
+ cause: `baz`,
+ expected: []string{
+ "foo",
+ "bar",
+ },
+ },
+ {
+ caption: "the parser may report the EOF as an expected lookahead symbol",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `foobar`,
+ cause: `bar`,
+ expected: []string{
+ "<eof>",
+ },
+ },
+ {
+ caption: "the parser may report the EOF and others as expected lookahead symbols",
+ specSrc: `
+#name test;
+
+s
+ : foo
+ |
+ ;
+
+foo
+ : 'foo';
+`,
+ src: `bar`,
+ cause: `bar`,
+ expected: []string{
+ "foo",
+ "<eof>",
+ },
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ ast, err := parser.Parse(strings.NewReader(tt.specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := grammar.GrammarBuilder{
+ AST: ast,
+ }
+ gram, _, err := b.Build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p, err := NewParser(toks, NewGrammar(gram))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ synErrs := p.SyntaxErrors()
+ if synErrs == nil {
+ t.Fatalf("expected one syntax error, but it didn't occur")
+ }
+ if len(synErrs) != 1 {
+ t.Fatalf("too many syntax errors: %v errors", len(synErrs))
+ }
+ synErr := synErrs[0]
+ if string(synErr.Token.Lexeme()) != tt.cause {
+ t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme()))
+ }
+ if len(synErr.ExpectedTerminals) != len(tt.expected) {
+ t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals)
+ }
+ sort.Slice(tt.expected, func(i, j int) bool {
+ return tt.expected[i] < tt.expected[j]
+ })
+ sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool {
+ return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j]
+ })
+ for i, e := range tt.expected {
+ if synErr.ExpectedTerminals[i] != e {
+ t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i])
+ }
+ }
+ })
+ }
+}
diff --git a/tests/unit/driver/parser/conflict_test.go b/tests/unit/driver/parser/conflict_test.go
deleted file mode 100644
index 0bc14d4..0000000
--- a/tests/unit/driver/parser/conflict_test.go
+++ /dev/null
@@ -1,524 +0,0 @@
-package parser
-
-import (
- "strings"
- "testing"
-
- "urubu/grammar"
- "urubu/spec/grammar/parser"
-)
-
-func TestParserWithConflicts(t *testing.T) {
- tests := []struct {
- caption string
- specSrc string
- src string
- cst *Node
- }{
- {
- caption: "when a shift/reduce conflict occurred, we prioritize the shift action",
- specSrc: `
-#name test;
-
-expr
- : expr assign expr
- | id
- ;
-
-id: "[A-Za-z0-9_]+";
-assign: '=';
-`,
- src: `foo=bar=baz`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "foo"),
- ),
- termNode("assign", "="),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "bar"),
- ),
- termNode("assign", "="),
- nonTermNode("expr",
- termNode("id", "baz"),
- ),
- ),
- ),
- },
- {
- caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar",
- specSrc: `
-#name test;
-
-s
- : a
- | b
- ;
-a
- : id
- ;
-b
- : id
- ;
-
-id: "[A-Za-z0-9_]+";
-`,
- src: `foo`,
- cst: nonTermNode("s",
- nonTermNode("a",
- termNode("id", "foo"),
- ),
- ),
- },
- {
- caption: "left associativities defined earlier in the grammar have higher precedence",
- specSrc: `
-#name test;
-
-#prec (
- #left mul
- #left add
-);
-
-expr
- : expr add expr
- | expr mul expr
- | id
- ;
-
-id: "[A-Za-z0-9_]+";
-add: '+';
-mul: '*';
-`,
- src: `a+b*c*d+e`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "a"),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "b"),
- ),
- termNode("mul", "*"),
- nonTermNode("expr",
- termNode("id", "c"),
- ),
- ),
- termNode("mul", "*"),
- nonTermNode("expr",
- termNode("id", "d"),
- ),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- termNode("id", "e"),
- ),
- ),
- },
- {
- caption: "left associativities defined in the same line have the same precedence",
- specSrc: `
-#name test;
-
-#prec (
- #left add sub
-);
-
-expr
- : expr add expr
- | expr sub expr
- | id
- ;
-
-id: "[A-Za-z0-9_]+";
-add: '+';
-sub: '-';
-`,
- src: `a-b+c+d-e`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "a"),
- ),
- termNode("sub", "-"),
- nonTermNode("expr",
- termNode("id", "b"),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- termNode("id", "c"),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- termNode("id", "d"),
- ),
- ),
- termNode("sub", "-"),
- nonTermNode("expr",
- termNode("id", "e"),
- ),
- ),
- },
- {
- caption: "right associativities defined earlier in the grammar have higher precedence",
- specSrc: `
-#name test;
-
-#prec (
- #right r1
- #right r2
-);
-
-expr
- : expr r2 expr
- | expr r1 expr
- | id
- ;
-
-whitespaces #skip
- : "[\u{0009}\u{0020}]+";
-r1
- : 'r1';
-r2
- : 'r2';
-id
- : "[A-Za-z0-9_]+";
-`,
- src: `a r2 b r1 c r1 d r2 e`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "a"),
- ),
- termNode("r2", "r2"),
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "b"),
- ),
- termNode("r1", "r1"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "c"),
- ),
- termNode("r1", "r1"),
- nonTermNode("expr",
- termNode("id", "d"),
- ),
- ),
- ),
- termNode("r2", "r2"),
- nonTermNode("expr",
- termNode("id", "e"),
- ),
- ),
- ),
- },
- {
- caption: "right associativities defined in the same line have the same precedence",
- specSrc: `
-#name test;
-
-#prec (
- #right r1 r2
-);
-
-expr
- : expr r2 expr
- | expr r1 expr
- | id
- ;
-
-whitespaces #skip
- : "[\u{0009}\u{0020}]+";
-r1
- : 'r1';
-r2
- : 'r2';
-id
- : "[A-Za-z0-9_]+";
-`,
- src: `a r2 b r1 c r1 d r2 e`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "a"),
- ),
- termNode("r2", "r2"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "b"),
- ),
- termNode("r1", "r1"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "c"),
- ),
- termNode("r1", "r1"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "d"),
- ),
- termNode("r2", "r2"),
- nonTermNode("expr",
- termNode("id", "e"),
- ),
- ),
- ),
- ),
- ),
- },
- {
- caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence",
- specSrc: `
-#name test;
-
-#prec (
- #assign a1
- #assign a2
-);
-
-expr
- : expr a2 expr
- | expr a1 expr
- | id
- ;
-
-whitespaces #skip
- : "[\u{0009}\u{0020}]+";
-a1
- : 'a1';
-a2
- : 'a2';
-id
- : "[A-Za-z0-9_]+";
-`,
- src: `a a2 b a1 c a1 d a2 e`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "a"),
- ),
- termNode("a2", "a2"),
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "b"),
- ),
- termNode("a1", "a1"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "c"),
- ),
- termNode("a1", "a1"),
- nonTermNode("expr",
- termNode("id", "d"),
- ),
- ),
- ),
- termNode("a2", "a2"),
- nonTermNode("expr",
- termNode("id", "e"),
- ),
- ),
- ),
- },
- {
- caption: "terminal symbols with an #assign directive defined in the same line have the same precedence",
- specSrc: `
-#name test;
-
-#prec (
- #assign a1 a2
-);
-
-expr
- : expr a2 expr
- | expr a1 expr
- | id
- ;
-
-whitespaces #skip
- : "[\u{0009}\u{0020}]+";
-a1
- : 'a1';
-a2
- : 'a2';
-id
- : "[A-Za-z0-9_]+";
-`,
- src: `a a2 b a1 c a1 d a2 e`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "a"),
- ),
- termNode("a2", "a2"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "b"),
- ),
- termNode("a1", "a1"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "c"),
- ),
- termNode("a1", "a1"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "d"),
- ),
- termNode("a2", "a2"),
- nonTermNode("expr",
- termNode("id", "e"),
- ),
- ),
- ),
- ),
- ),
- },
- {
- caption: "#left, #right, and #assign can be mixed",
- specSrc: `
-#name test;
-
-#prec (
- #left mul div
- #left add sub
- #assign else
- #assign then
- #right assign
-);
-
-expr
- : expr add expr
- | expr sub expr
- | expr mul expr
- | expr div expr
- | expr assign expr
- | if expr then expr
- | if expr then expr else expr
- | id
- ;
-
-ws #skip: "[\u{0009}\u{0020}]+";
-if: 'if';
-then: 'then';
-else: 'else';
-id: "[A-Za-z0-9_]+";
-add: '+';
-sub: '-';
-mul: '*';
-div: '/';
-assign: '=';
-`,
- src: `x = y = a + b * c - d / e + if f then if g then h else i`,
- cst: nonTermNode(
- "expr",
- nonTermNode("expr",
- termNode("id", "x"),
- ),
- termNode("assign", "="),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "y"),
- ),
- termNode("assign", "="),
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "a"),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "b"),
- ),
- termNode("mul", "*"),
- nonTermNode("expr",
- termNode("id", "c"),
- ),
- ),
- ),
- termNode("sub", "-"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("id", "d"),
- ),
- termNode("div", "/"),
- nonTermNode("expr",
- termNode("id", "e"),
- ),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- termNode("if", "if"),
- nonTermNode("expr",
- termNode("id", "f"),
- ),
- termNode("then", "then"),
- nonTermNode("expr",
- termNode("if", "if"),
- nonTermNode("expr",
- termNode("id", "g"),
- ),
- termNode("then", "then"),
- nonTermNode("expr",
- termNode("id", "h"),
- ),
- termNode("else", "else"),
- nonTermNode("expr",
- termNode("id", "i"),
- ),
- ),
- ),
- ),
- ),
- ),
- },
- }
-
- for _, tt := range tests {
- t.Run(tt.caption, func(t *testing.T) {
- ast, err := parser.Parse(strings.NewReader(tt.specSrc))
- if err != nil {
- t.Fatal(err)
- }
-
- b := grammar.GrammarBuilder{
- AST: ast,
- }
- cg, _, err := b.Build()
- if err != nil {
- t.Fatal(err)
- }
-
- toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
- if err != nil {
- t.Fatal(err)
- }
-
- gram := NewGrammar(cg)
- tb := NewDefaultSyntaxTreeBuilder()
- p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb)))
- if err != nil {
- t.Fatal(err)
- }
-
- err = p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- if tt.cst != nil {
- testTree(t, tb.Tree(), tt.cst)
- }
- })
- }
-}
diff --git a/tests/unit/driver/parser/lac_test.go b/tests/unit/driver/parser/lac_test.go
deleted file mode 100644
index c2368e8..0000000
--- a/tests/unit/driver/parser/lac_test.go
+++ /dev/null
@@ -1,120 +0,0 @@
-package parser
-
-import (
- "strings"
- "testing"
-
- "urubu/grammar"
- "urubu/spec/grammar/parser"
-)
-
-func TestParserWithLAC(t *testing.T) {
- specSrc := `
-#name test;
-
-s
- : t t
- ;
-t
- : c t
- | d
- ;
-
-c: 'c';
-d: 'd';
-`
-
- src := `ccd`
-
- actLogWithLAC := []string{
- "shift/c",
- "shift/c",
- "shift/d",
- "miss",
- }
-
- actLogWithoutLAC := []string{
- "shift/c",
- "shift/c",
- "shift/d",
- "reduce/t",
- "reduce/t",
- "reduce/t",
- "miss",
- }
-
- ast, err := parser.Parse(strings.NewReader(specSrc))
- if err != nil {
- t.Fatal(err)
- }
-
- b := grammar.GrammarBuilder{
- AST: ast,
- }
- gram, _, err := b.Build()
- if err != nil {
- t.Fatal(err)
- }
-
- t.Run("LAC is enabled", func(t *testing.T) {
- semAct := &testSemAct{
- gram: gram,
- }
-
- toks, err := NewTokenStream(gram, strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
-
- p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
- if err != nil {
- t.Fatal(err)
- }
-
- err = p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- if len(semAct.actLog) != len(actLogWithLAC) {
- t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
- }
-
- for i, e := range actLogWithLAC {
- if semAct.actLog[i] != e {
- t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
- }
- }
- })
-
- t.Run("LAC is disabled", func(t *testing.T) {
- semAct := &testSemAct{
- gram: gram,
- }
-
- toks, err := NewTokenStream(gram, strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
-
- p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC())
- if err != nil {
- t.Fatal(err)
- }
-
- err = p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- if len(semAct.actLog) != len(actLogWithoutLAC) {
- t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
- }
-
- for i, e := range actLogWithoutLAC {
- if semAct.actLog[i] != e {
- t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
- }
- }
- })
-}
diff --git a/tests/unit/driver/parser/parser_test.go b/tests/unit/driver/parser/parser_test.go
deleted file mode 100644
index bca0391..0000000
--- a/tests/unit/driver/parser/parser_test.go
+++ /dev/null
@@ -1,833 +0,0 @@
-package parser
-
-import (
- "fmt"
- "strings"
- "testing"
-
- "urubu/grammar"
- "urubu/spec/grammar/parser"
-)
-
-func termNode(kind string, text string, children ...*Node) *Node {
- return &Node{
- Type: NodeTypeTerminal,
- KindName: kind,
- Text: text,
- Children: children,
- }
-}
-
-func errorNode() *Node {
- return &Node{
- Type: NodeTypeError,
- KindName: "error",
- }
-}
-
-func nonTermNode(kind string, children ...*Node) *Node {
- return &Node{
- Type: NodeTypeNonTerminal,
- KindName: kind,
- Children: children,
- }
-}
-
-func TestParser_Parse(t *testing.T) {
- tests := []struct {
- specSrc string
- src string
- synErr bool
- cst *Node
- ast *Node
- }{
- {
- specSrc: `
-#name test;
-
-expr
- : expr add term
- | term
- ;
-term
- : term mul factor
- | factor
- ;
-factor
- : l_paren expr r_paren
- | id
- ;
-
-add
- : '+';
-mul
- : '*';
-l_paren
- : '(';
-r_paren
- : ')';
-id
- : "[A-Za-z_][0-9A-Za-z_]*";
-`,
- src: `(a+(b+c))*d+e`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("term",
- nonTermNode("term",
- nonTermNode("factor",
- termNode("l_paren", "("),
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("term",
- nonTermNode("factor",
- termNode("id", "a"),
- ),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("term",
- nonTermNode("factor",
- termNode("l_paren", "("),
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("term",
- nonTermNode("factor",
- termNode("id", "b"),
- ),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("term",
- nonTermNode("factor",
- termNode("id", "c"),
- ),
- ),
- ),
- termNode("r_paren", ")"),
- ),
- ),
- ),
- termNode("r_paren", ")"),
- ),
- ),
- termNode("mul", "*"),
- nonTermNode("factor",
- termNode("id", "d"),
- ),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("term",
- nonTermNode("factor",
- termNode("id", "e"),
- ),
- ),
- ),
- },
- // Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are
- // not allowed in string literals.
- {
- specSrc: `
-#name test;
-
-s
- : a b c
- ;
-
-a
- : '\f{foo}';
-b
- : '\u{0000}';
-c
- : '\p{gc=Letter}';
-`,
- src: `\f{foo}\u{0000}\p{gc=Letter}`,
- cst: nonTermNode("s",
- termNode("a", `\f{foo}`),
- termNode("b", `\u{0000}`),
- termNode("c", `\p{gc=Letter}`),
- ),
- },
- // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
- {
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-foo
- :
- ;
-bar
- : bar_text
- |
- ;
-bar_text: "bar";
-`,
- src: ``,
- cst: nonTermNode("s",
- nonTermNode("foo"),
- nonTermNode("bar"),
- ),
- },
- // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
- {
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-foo
- :
- ;
-bar
- : bar_text
- |
- ;
-
-bar_text
- : "bar";
-`,
- src: `bar`,
- cst: nonTermNode("s",
- nonTermNode("foo"),
- nonTermNode("bar",
- termNode("bar_text", "bar"),
- ),
- ),
- },
- // A production can have multiple alternative productions.
- {
- specSrc: `
-#name test;
-
-#prec (
- #assign $uminus
- #left mul div
- #left add sub
-);
-
-expr
- : expr add expr
- | expr sub expr
- | expr mul expr
- | expr div expr
- | int
- | sub int #prec $uminus // This 'sub' means the unary minus symbol.
- ;
-
-int
- : "0|[1-9][0-9]*";
-add
- : '+';
-sub
- : '-';
-mul
- : '*';
-div
- : '/';
-`,
- src: `-1*-2+3-4/5`,
- ast: nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("sub", "-"),
- termNode("int", "1"),
- ),
- termNode("mul", "*"),
- nonTermNode("expr",
- termNode("sub", "-"),
- termNode("int", "2"),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- termNode("int", "3"),
- ),
- ),
- termNode("sub", "-"),
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("int", "4"),
- ),
- termNode("div", "/"),
- nonTermNode("expr",
- termNode("int", "5"),
- ),
- ),
- ),
- },
- // A lexical production can have multiple production directives.
- {
- specSrc: `
-#name test;
-
-s
- : push_a push_b pop pop
- ;
-
-push_a #mode default #push a
- : '->a';
-push_b #mode a #push b
- : '->b';
-pop #mode a b #pop
- : '<-';
-`,
- src: `->a->b<-<-`,
- ast: nonTermNode("s",
- termNode("push_a", "->a"),
- termNode("push_b", "->b"),
- termNode("pop", "<-"),
- termNode("pop", "<-"),
- ),
- },
- {
- specSrc: `
-#name test;
-
-mode_tran_seq
- : mode_tran_seq mode_tran
- | mode_tran
- ;
-mode_tran
- : push_m1
- | push_m2
- | pop_m1
- | pop_m2
- ;
-
-push_m1 #push m1
- : "->";
-push_m2 #mode m1 #push m2
- : "-->";
-pop_m1 #mode m1 #pop
- : "<-";
-pop_m2 #mode m2 #pop
- : "<--";
-whitespace #mode default m1 m2 #skip
- : "\u{0020}+";
-`,
- src: ` -> --> <-- <- `,
- },
- {
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo
- : "foo";
-bar #mode default
- : "bar";
-`,
- src: `foobar`,
- },
- // When #push and #pop are applied to the same symbol, #pop will run first, then #push.
- {
- specSrc: `
-#name test;
-
-s
- : foo bar baz
- ;
-
-foo #push m1
- : 'foo';
-bar #mode m1 #pop #push m2
- : 'bar';
-baz #mode m2
- : 'baz';
-`,
- src: `foobarbaz`,
- ast: nonTermNode("s",
- termNode("foo", "foo"),
- termNode("bar", "bar"),
- termNode("baz", "baz"),
- ),
- },
- // When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first
- // in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which
- // they are executed.
- {
- specSrc: `
-#name test;
-
-s
- : foo bar baz
- ;
-
-foo #push m1
- : 'foo';
-bar #mode m1 #push m2 #pop
- : 'bar';
-baz #mode m2
- : 'baz';
-`,
- src: `foobarbaz`,
- ast: nonTermNode("s",
- termNode("foo", "foo"),
- termNode("bar", "bar"),
- termNode("baz", "baz"),
- ),
- },
- // The parser can skips specified tokens.
- {
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo
- : "foo";
-bar
- : "bar";
-white_space #skip
- : "[\u{0009}\u{0020}]+";
-`,
- src: `foo bar`,
- },
- // A grammar can contain fragments.
- {
- specSrc: `
-#name test;
-
-s
- : tagline
- ;
-tagline
- : "\f{words} IS OUT THERE.";
-fragment words
- : "[A-Za-z\u{0020}]+";
-`,
- src: `THE TRUTH IS OUT THERE.`,
- },
- // A grammar can contain ast actions.
- {
- specSrc: `
-#name test;
-
-list
- : l_bracket elems r_bracket #ast elems...
- ;
-elems
- : elems comma id #ast elems... id
- | id
- ;
-
-whitespace #skip
- : "\u{0020}+";
-l_bracket
- : '[';
-r_bracket
- : ']';
-comma
- : ',';
-id
- : "[A-Za-z]+";
-`,
- src: `[Byers, Frohike, Langly]`,
- cst: nonTermNode("list",
- termNode("x_1", "["),
- nonTermNode("elems",
- nonTermNode("elems",
- nonTermNode("elems",
- termNode("id", "Byers"),
- ),
- termNode("x_3", ","),
- termNode("id", "Frohike"),
- ),
- termNode("x_3", ","),
- termNode("id", "Langly"),
- ),
- termNode("x_2", "]"),
- ),
- ast: nonTermNode("list",
- termNode("id", "Byers"),
- termNode("id", "Frohike"),
- termNode("id", "Langly"),
- ),
- },
- // The '...' operator can expand child nodes.
- {
- specSrc: `
-#name test;
-
-s
- : a #ast a...
- ;
-a
- : a comma foo #ast a... foo
- | foo
- ;
-
-comma
- : ',';
-foo
- : 'foo';
-`,
- src: `foo,foo,foo`,
- ast: nonTermNode("s",
- termNode("foo", "foo"),
- termNode("foo", "foo"),
- termNode("foo", "foo"),
- ),
- },
- // The '...' operator also can applied to an element having no children.
- {
- specSrc: `
-#name test;
-
-s
- : a semi_colon #ast a...
- ;
-a
- :
- ;
-
-semi_colon
- : ';';
-`,
- src: `;`,
- ast: nonTermNode("s"),
- },
- // A label can be a parameter of #ast directive.
- {
- specSrc: `
-#name test;
-
-#prec (
- #left add sub
-);
-
-expr
- : expr@lhs add expr@rhs #ast add lhs rhs
- | expr@lhs sub expr@rhs #ast sub lhs rhs
- | num
- ;
-
-add
- : '+';
-sub
- : '-';
-num
- : "0|[1-9][0-9]*";
-`,
- src: `1+2-3`,
- ast: nonTermNode("expr",
- termNode("sub", "-"),
- nonTermNode("expr",
- termNode("add", "+"),
- nonTermNode("expr",
- termNode("num", "1"),
- ),
- nonTermNode("expr",
- termNode("num", "2"),
- ),
- ),
- nonTermNode("expr",
- termNode("num", "3"),
- ),
- ),
- },
- // An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed.
- {
- specSrc: `
-#name test;
-
-s
- : foo@x semi_colon #ast foo
- ;
-
-semi_colon
- : ';';
-foo
- : 'foo';
-`,
- src: `foo;`,
- ast: nonTermNode("s",
- termNode("foo", "foo"),
- ),
- },
- // A production has the same precedence and associativity as the right-most terminal symbol.
- {
- specSrc: `
-#name test;
-
-#prec (
- #left add
-);
-
-expr
- : expr add expr // This alternative has the same precedence and associativiry as 'add'.
- | int
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-int
- : "0|[1-9][0-9]*";
-add
- : '+';
-`,
- // This source is recognized as the following structure because the production `expr → expr add expr` has the same
- // precedence and associativity as the symbol 'add'.
- //
- // ((1+2)+3)
- //
- // If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence
- // and associativity and this source will be recognized as the following structure.
- //
- // (1+(2+3))
- src: `1+2+3`,
- ast: nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("int", "1"),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- termNode("int", "2"),
- ),
- ),
- termNode("add", "+"),
- nonTermNode("expr",
- termNode("int", "3"),
- ),
- ),
- },
- // The 'prec' directive can set precedence of a production.
- {
- specSrc: `
-#name test;
-
-#prec (
- #assign $uminus
- #left mul div
- #left add sub
-);
-
-expr
- : expr add expr
- | expr sub expr
- | expr mul expr
- | expr div expr
- | int
- | sub int #prec $uminus // This 'sub' means a unary minus symbol.
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-int
- : "0|[1-9][0-9]*";
-add
- : '+';
-sub
- : '-';
-mul
- : '*';
-div
- : '/';
-`,
- // This source is recognized as the following structure because the production `expr → sub expr`
- // has the `#prec mul` directive and has the same precedence of the symbol `mul`.
- //
- // (((-1) * 20) / 5)
- //
- // If the production doesn't have the `#prec` directive, this source will be recognized as
- // the following structure.
- //
- // (- ((1 * 20) / 5))
- src: `-1*20/5`,
- cst: nonTermNode("expr",
- nonTermNode("expr",
- nonTermNode("expr",
- termNode("sub", "-"),
- termNode("int", "1"),
- ),
- termNode("mul", "*"),
- nonTermNode("expr",
- termNode("int", "20"),
- ),
- ),
- termNode("div", "/"),
- nonTermNode("expr",
- termNode("int", "5"),
- ),
- ),
- },
- // The grammar can contain the 'error' symbol.
- {
- specSrc: `
-#name test;
-
-s
- : id id id semi_colon
- | error semi_colon
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-semi_colon
- : ';';
-id
- : "[A-Za-z_]+";
-`,
- src: `foo bar baz ;`,
- },
- // The 'error' symbol can appear in an #ast directive.
- {
- specSrc: `
-#name test;
-
-s
- : foo semi_colon
- | error semi_colon #ast error
- ;
-
-semi_colon
- : ';';
-foo
- : 'foo';
-`,
- src: `bar;`,
- synErr: true,
- ast: nonTermNode("s",
- errorNode(),
- ),
- },
- // The 'error' symbol can have a label, and an #ast can reference it.
- {
- specSrc: `
-#name test;
-
-s
- : foo semi_colon
- | error@e semi_colon #ast e
- ;
-
-semi_colon
- : ';';
-foo
- : 'foo';
-`,
- src: `bar;`,
- synErr: true,
- ast: nonTermNode("s",
- errorNode(),
- ),
- },
- // The grammar can contain the 'recover' directive.
- {
- specSrc: `
-#name test;
-
-seq
- : seq elem
- | elem
- ;
-elem
- : id id id semi_colon
- | error semi_colon #recover
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-semi_colon
- : ';';
-id
- : "[A-Za-z_]+";
-`,
- src: `a b c ; d e f ;`,
- },
- // The same label can be used between different alternatives.
- {
- specSrc: `
-#name test;
-
-s
- : foo@x bar
- | foo@x
- ;
-
-foo: 'foo';
-bar: 'bar';
-`,
- src: `foo`,
- },
- }
-
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
- ast, err := parser.Parse(strings.NewReader(tt.specSrc))
- if err != nil {
- t.Fatal(err)
- }
-
- b := grammar.GrammarBuilder{
- AST: ast,
- }
- cg, _, err := b.Build()
- if err != nil {
- t.Fatal(err)
- }
-
- toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
- if err != nil {
- t.Fatal(err)
- }
-
- gram := NewGrammar(cg)
- tb := NewDefaultSyntaxTreeBuilder()
- var opt []ParserOption
- switch {
- case tt.ast != nil:
- opt = append(opt, SemanticAction(NewASTActionSet(gram, tb)))
- case tt.cst != nil:
- opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb)))
- }
- p, err := NewParser(toks, gram, opt...)
- if err != nil {
- t.Fatal(err)
- }
-
- err = p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- if !tt.synErr && len(p.SyntaxErrors()) > 0 {
- for _, synErr := range p.SyntaxErrors() {
- t.Fatalf("unexpected syntax errors occurred: %v", synErr)
- }
- }
-
- switch {
- case tt.ast != nil:
- testTree(t, tb.Tree(), tt.ast)
- case tt.cst != nil:
- testTree(t, tb.Tree(), tt.cst)
- }
- })
- }
-}
-
-func testTree(t *testing.T, node, expected *Node) {
- t.Helper()
-
- if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text {
- t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node)
- }
- if len(node.Children) != len(expected.Children) {
- t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children))
- }
- for i, c := range node.Children {
- testTree(t, c, expected.Children[i])
- }
-}
diff --git a/tests/unit/driver/parser/semantic_action_test.go b/tests/unit/driver/parser/semantic_action_test.go
deleted file mode 100644
index cb3ee70..0000000
--- a/tests/unit/driver/parser/semantic_action_test.go
+++ /dev/null
@@ -1,227 +0,0 @@
-package parser
-
-import (
- "fmt"
- "strings"
- "testing"
-
- "urubu/grammar"
- spec "urubu/spec/grammar"
- "urubu/spec/grammar/parser"
-)
-
-type testSemAct struct {
- gram *spec.CompiledGrammar
- actLog []string
-}
-
-func (a *testSemAct) Shift(tok VToken, recovered bool) {
- t := a.gram.Syntactic.Terminals[tok.TerminalID()]
- if recovered {
- a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t))
- } else {
- a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t))
- }
-}
-
-func (a *testSemAct) Reduce(prodNum int, recovered bool) {
- lhsSym := a.gram.Syntactic.LHSSymbols[prodNum]
- lhsText := a.gram.Syntactic.NonTerminals[lhsSym]
- if recovered {
- a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText))
- } else {
- a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText))
- }
-}
-
-func (a *testSemAct) Accept() {
- a.actLog = append(a.actLog, "accept")
-}
-
-func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) {
- a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped))
-}
-
-func (a *testSemAct) MissError(cause VToken) {
- a.actLog = append(a.actLog, "miss")
-}
-
-func TestParserWithSemanticAction(t *testing.T) {
- specSrcWithErrorProd := `
-#name test;
-
-seq
- : seq elem semicolon
- | elem semicolon
- | error star star semicolon
- | error semicolon #recover
- ;
-elem
- : char char char
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-semicolon
- : ';';
-star
- : '*';
-char
- : "[a-z]";
-`
-
- specSrcWithoutErrorProd := `
-#name test;
-
-seq
- : seq elem semicolon
- | elem semicolon
- ;
-elem
- : char char char
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-semicolon
- : ';';
-char
- : "[a-z]";
-`
-
- tests := []struct {
- caption string
- specSrc string
- src string
- actLog []string
- }{
- {
- caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.",
- specSrc: specSrcWithErrorProd,
- src: `a b c; d e f;`,
- actLog: []string{
- "shift/char",
- "shift/char",
- "shift/char",
- "reduce/elem",
- "shift/semicolon",
- "reduce/seq",
-
- "shift/char",
- "shift/char",
- "shift/char",
- "reduce/elem",
- "shift/semicolon",
- "reduce/seq",
-
- "accept",
- },
- },
- {
- caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.",
- specSrc: specSrcWithErrorProd,
- src: `a; b !; c d !; e ! * *; h i j;`,
- actLog: []string{
- "shift/char",
- "trap/1/shift/error",
- "shift/semicolon",
- "reduce/seq/recovered",
-
- "shift/char",
- "trap/2/shift/error",
- "shift/semicolon",
- "reduce/seq/recovered",
-
- "shift/char",
- "shift/char",
- "trap/3/shift/error",
- "shift/semicolon",
- "reduce/seq/recovered",
-
- "shift/char",
- "trap/2/shift/error",
- "shift/star",
- "shift/star",
- // When the driver shifts three times, it recovers from an error.
- "shift/semicolon/recovered",
- "reduce/seq",
-
- "shift/char",
- "shift/char",
- "shift/char",
- "reduce/elem",
- "shift/semicolon",
- "reduce/seq",
-
- // Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted
- // according to the error production.
- "accept",
- },
- },
- {
- caption: "when the input doesn't meet the error production, the driver calls `MissError`.",
- specSrc: specSrcWithErrorProd,
- src: `a !`,
- actLog: []string{
- "shift/char",
- "trap/1/shift/error",
-
- "miss",
- },
- },
- {
- caption: "when a syntax error isn't trapped, the driver calls `MissError`.",
- specSrc: specSrcWithoutErrorProd,
- src: `a !`,
- actLog: []string{
- "shift/char",
-
- "miss",
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.caption, func(t *testing.T) {
- ast, err := parser.Parse(strings.NewReader(tt.specSrc))
- if err != nil {
- t.Fatal(err)
- }
-
- b := grammar.GrammarBuilder{
- AST: ast,
- }
- gram, _, err := b.Build()
- if err != nil {
- t.Fatal(err)
- }
-
- toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
- if err != nil {
- t.Fatal(err)
- }
-
- semAct := &testSemAct{
- gram: gram,
- }
- p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
- if err != nil {
- t.Fatal(err)
- }
-
- err = p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- if len(semAct.actLog) != len(tt.actLog) {
- t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
- }
-
- for i, e := range tt.actLog {
- if semAct.actLog[i] != e {
- t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
- }
- }
- })
- }
-}
diff --git a/tests/unit/driver/parser/syntax_error_test.go b/tests/unit/driver/parser/syntax_error_test.go
deleted file mode 100644
index 90e5bd2..0000000
--- a/tests/unit/driver/parser/syntax_error_test.go
+++ /dev/null
@@ -1,306 +0,0 @@
-package parser
-
-import (
- "fmt"
- "sort"
- "strings"
- "testing"
-
- "urubu/grammar"
- "urubu/spec/grammar/parser"
-)
-
-func TestParserWithSyntaxErrors(t *testing.T) {
- tests := []struct {
- caption string
- specSrc string
- src string
- synErrCount int
- }{
- {
- caption: "the parser can report a syntax error",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- src: `bar`,
- synErrCount: 1,
- },
- {
- caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state",
- specSrc: `
-#name test;
-
-seq
- : seq elem semi_colon
- | elem semi_colon
- | error semi_colon #recover
- ;
-elem
- : a b c
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-semi_colon
- : ';';
-a
- : 'a';
-b
- : 'b';
-c
- : 'c';
-`,
- src: `!; a!; ab!;`,
- synErrCount: 3,
- },
- {
- caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears",
- specSrc: `
-#name test;
-
-seq
- : seq elem semi_colon
- | elem semi_colon
- | error semi_colon #recover
- ;
-elem
- : a b c
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-semi_colon
- : ';';
-a
- : 'a';
-b
- : 'b';
-c
- : 'c';
-`,
- // After the parser trasits to the error state reading the first invalid symbol ('!'),
- // the second and third invalid symbols ('!') are ignored.
- src: `! ! !; a!; ab!;`,
- synErrCount: 3,
- },
- {
- caption: "when the parser performs shift three times, the parser recovers from the error state",
- specSrc: `
-#name test;
-
-seq
- : seq elem semi_colon
- | elem semi_colon
- | error star star semi_colon
- ;
-elem
- : a b c
- ;
-
-ws #skip
- : "[\u{0009}\u{0020}]+";
-semi_colon
- : ';';
-star
- : '*';
-a
- : 'a';
-b
- : 'b';
-c
- : 'c';
-`,
- src: `!**; a!**; ab!**; abc!`,
- synErrCount: 4,
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
- ast, err := parser.Parse(strings.NewReader(tt.specSrc))
- if err != nil {
- t.Fatal(err)
- }
-
- b := grammar.GrammarBuilder{
- AST: ast,
- }
- gram, _, err := b.Build()
- if err != nil {
- t.Fatal(err)
- }
-
- toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
- if err != nil {
- t.Fatal(err)
- }
-
- p, err := NewParser(toks, NewGrammar(gram))
- if err != nil {
- t.Fatal(err)
- }
-
- err = p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- synErrs := p.SyntaxErrors()
- if len(synErrs) != tt.synErrCount {
- t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs))
- }
- })
- }
-}
-
-func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) {
- tests := []struct {
- caption string
- specSrc string
- src string
- cause string
- expected []string
- }{
- {
- caption: "the parser reports an expected lookahead symbol",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- src: `bar`,
- cause: `bar`,
- expected: []string{
- "foo",
- },
- },
- {
- caption: "the parser reports expected lookahead symbols",
- specSrc: `
-#name test;
-
-s
- : foo
- | bar
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- src: `baz`,
- cause: `baz`,
- expected: []string{
- "foo",
- "bar",
- },
- },
- {
- caption: "the parser may report the EOF as an expected lookahead symbol",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- src: `foobar`,
- cause: `bar`,
- expected: []string{
- "<eof>",
- },
- },
- {
- caption: "the parser may report the EOF and others as expected lookahead symbols",
- specSrc: `
-#name test;
-
-s
- : foo
- |
- ;
-
-foo
- : 'foo';
-`,
- src: `bar`,
- cause: `bar`,
- expected: []string{
- "foo",
- "<eof>",
- },
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
- ast, err := parser.Parse(strings.NewReader(tt.specSrc))
- if err != nil {
- t.Fatal(err)
- }
-
- b := grammar.GrammarBuilder{
- AST: ast,
- }
- gram, _, err := b.Build()
- if err != nil {
- t.Fatal(err)
- }
-
- toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
- if err != nil {
- t.Fatal(err)
- }
-
- p, err := NewParser(toks, NewGrammar(gram))
- if err != nil {
- t.Fatal(err)
- }
-
- err = p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- synErrs := p.SyntaxErrors()
- if synErrs == nil {
- t.Fatalf("expected one syntax error, but it didn't occur")
- }
- if len(synErrs) != 1 {
- t.Fatalf("too many syntax errors: %v errors", len(synErrs))
- }
- synErr := synErrs[0]
- if string(synErr.Token.Lexeme()) != tt.cause {
- t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme()))
- }
- if len(synErr.ExpectedTerminals) != len(tt.expected) {
- t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals)
- }
- sort.Slice(tt.expected, func(i, j int) bool {
- return tt.expected[i] < tt.expected[j]
- })
- sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool {
- return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j]
- })
- for i, e := range tt.expected {
- if synErr.ExpectedTerminals[i] != e {
- t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i])
- }
- }
- })
- }
-}
diff --git a/tests/unit/grammar/grammar_test.go b/tests/unit/grammar.go
index ddedb27..3743b23 100644
--- a/tests/unit/grammar/grammar_test.go
+++ b/tests/unit/grammar.go
@@ -1,13 +1,225 @@
package grammar
import (
+ "fmt"
"strings"
"testing"
verr "urubu/error"
+ "urubu/grammar/symbol"
"urubu/spec/grammar/parser"
)
+type first struct {
+ lhs string
+ num int
+ dot int
+ symbols []string
+ empty bool
+}
+
+func TestGenFirst(t *testing.T) {
+ tests := []struct {
+ caption string
+ src string
+ first []first
+ }{
+ {
+ caption: "productions contain only non-empty productions",
+ src: `
+#name test;
+
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`,
+ first: []first{
+ {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}},
+ {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
+ {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}},
+ {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
+ {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
+ {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}},
+ {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}},
+ {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}},
+ {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}},
+ },
+ },
+ {
+ caption: "productions contain the empty start production",
+ src: `
+#name test;
+
+s
+ :
+ ;
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "productions contain an empty production",
+ src: `
+#name test;
+
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar: "bar";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
+ {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "a start production contains a non-empty alternative and empty alternative",
+ src: `
+#name test;
+
+s
+ : foo
+ |
+ ;
+foo: "foo";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}},
+ {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ {
+ caption: "a production contains non-empty alternative and empty alternative",
+ src: `
+#name test;
+
+s
+ : foo
+ ;
+foo
+ : bar
+ |
+ ;
+bar: "bar";
+`,
+ first: []first{
+ {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
+ {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
+ {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}},
+ {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true},
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ fst, gram := genActualFirst(t, tt.src)
+
+ for _, ttFirst := range tt.first {
+ lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs)
+ if !ok {
+ t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs)
+ }
+
+ prod, ok := gram.productionSet.findByLHS(lhsSym)
+ if !ok {
+ t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym)
+ }
+
+ actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot)
+ if err != nil {
+ t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err)
+ }
+
+ expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable)
+
+ testFirst(t, actualFirst, expectedFirst)
+ }
+ })
+ }
+}
+
+func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err := b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+ fst, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if fst == nil {
+ t.Fatal("genFiest returned nil without any error")
+ }
+
+ return fst, gram
+}
+
+func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry {
+ t.Helper()
+
+ entry := newFirstEntry()
+ if empty {
+ entry.addEmpty()
+ }
+ for _, sym := range symbols {
+ symSym, ok := symTab.ToSymbol(sym)
+ if !ok {
+ t.Fatalf("a symbol was not found; symbol: %v", sym)
+ }
+ entry.add(symSym)
+ }
+
+ return entry
+}
+
+func testFirst(t *testing.T, actual, expected *firstEntry) {
+ if actual.empty != expected.empty {
+ t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty)
+ }
+
+ if len(actual.symbols) != len(expected.symbols) {
+ t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
+ }
+
+ for eSym := range expected.symbols {
+ if _, ok := actual.symbols[eSym]; !ok {
+ t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
+ }
+ }
+}
+
func TestGrammarBuilderOK(t *testing.T) {
type okTest struct {
caption string
@@ -3379,3 +3591,1057 @@ bar
})
}
}
+
+func TestGenLALR1Automaton(t *testing.T) {
+ // This grammar belongs to LALR(1) class, not SLR(1).
+ src := `
+#name test;
+
+s: l eq r | r;
+l: ref r | id;
+r: l;
+eq: '=';
+ref: '*';
+id: "[A-Za-z0-9_]+";
+`
+
+ var gram *Grammar
+ var automaton *lalr1Automaton
+ {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err = b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+
+ firstSet, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatalf("failed to create a FIRST set: %v", err)
+ }
+
+ automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet)
+ if err != nil {
+ t.Fatalf("failed to create a LALR1 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genLALR1Automaton returns nil without any error")
+ }
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
+ },
+ 1: {
+ withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
+ },
+ 2: {
+ withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
+ withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
+ },
+ 3: {
+ withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
+ },
+ 4: {
+ withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 5: {
+ withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 6: {
+ withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
+ },
+ 7: {
+ withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 8: {
+ withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 9: {
+ withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
+ },
+ }
+
+ expectedStates := []*expectedLRState{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("s"): expectedKernels[1],
+ genSym("l"): expectedKernels[2],
+ genSym("r"): expectedKernels[3],
+ genSym("ref"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s'", "s"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("eq"): expectedKernels[6],
+ },
+ reducibleProds: []*production{
+ genProd("r", "l"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s", "r"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("r"): expectedKernels[7],
+ genSym("l"): expectedKernels[8],
+ genSym("ref"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[5],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("l", "id"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("r"): expectedKernels[9],
+ genSym("l"): expectedKernels[8],
+ genSym("ref"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[7],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("l", "ref", "r"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[8],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("r", "l"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[9],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s", "l", "eq", "r"),
+ },
+ },
+ }
+
+ testLRAutomaton(t, expectedStates, automaton.lr0Automaton)
+}
+
+type expectedLRState struct {
+ kernelItems []*lrItem
+ nextStates map[symbol.Symbol][]*lrItem
+ reducibleProds []*production
+ emptyProdItems []*lrItem
+}
+
+func TestGenLR0Automaton(t *testing.T) {
+ src := `
+#name test;
+
+expr
+ : expr add term
+ | term
+ ;
+term
+ : term mul factor
+ | factor
+ ;
+factor
+ : l_paren expr r_paren
+ | id
+ ;
+add: "\+";
+mul: "\*";
+l_paren: "\(";
+r_paren: "\)";
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`
+
+ var gram *Grammar
+ var automaton *lr0Automaton
+ {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err = b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genLR0Automaton returns nil without any error")
+ }
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ genLR0Item("expr'", 0, "expr"),
+ },
+ 1: {
+ genLR0Item("expr'", 1, "expr"),
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ },
+ 2: {
+ genLR0Item("expr", 1, "term"),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 3: {
+ genLR0Item("term", 1, "factor"),
+ },
+ 4: {
+ genLR0Item("factor", 1, "l_paren", "expr", "r_paren"),
+ },
+ 5: {
+ genLR0Item("factor", 1, "id"),
+ },
+ 6: {
+ genLR0Item("expr", 2, "expr", "add", "term"),
+ },
+ 7: {
+ genLR0Item("term", 2, "term", "mul", "factor"),
+ },
+ 8: {
+ genLR0Item("expr", 1, "expr", "add", "term"),
+ genLR0Item("factor", 2, "l_paren", "expr", "r_paren"),
+ },
+ 9: {
+ genLR0Item("expr", 3, "expr", "add", "term"),
+ genLR0Item("term", 1, "term", "mul", "factor"),
+ },
+ 10: {
+ genLR0Item("term", 3, "term", "mul", "factor"),
+ },
+ 11: {
+ genLR0Item("factor", 3, "l_paren", "expr", "r_paren"),
+ },
+ }
+
+ expectedStates := []*expectedLRState{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("expr"): expectedKernels[1],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("add"): expectedKernels[6],
+ },
+ reducibleProds: []*production{
+ genProd("expr'", "expr"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("mul"): expectedKernels[7],
+ },
+ reducibleProds: []*production{
+ genProd("expr", "term"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("term", "factor"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("expr"): expectedKernels[8],
+ genSym("term"): expectedKernels[2],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[5],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("factor", "id"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("term"): expectedKernels[9],
+ genSym("factor"): expectedKernels[3],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[7],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("factor"): expectedKernels[10],
+ genSym("l_paren"): expectedKernels[4],
+ genSym("id"): expectedKernels[5],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[8],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("add"): expectedKernels[6],
+ genSym("r_paren"): expectedKernels[11],
+ },
+ reducibleProds: []*production{},
+ },
+ {
+ kernelItems: expectedKernels[9],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("mul"): expectedKernels[7],
+ },
+ reducibleProds: []*production{
+ genProd("expr", "expr", "add", "term"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[10],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("term", "term", "mul", "factor"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[11],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("factor", "l_paren", "expr", "r_paren"),
+ },
+ },
+ }
+
+ testLRAutomaton(t, expectedStates, automaton)
+}
+
+func TestLR0AutomatonContainingEmptyProduction(t *testing.T) {
+ src := `
+#name test;
+
+s
+ : foo bar
+ ;
+foo
+ :
+ ;
+bar
+ : b
+ |
+ ;
+
+b: "bar";
+`
+
+ var gram *Grammar
+ var automaton *lr0Automaton
+ {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err = b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 automaton: %v", err)
+ }
+ if automaton == nil {
+ t.Fatalf("genLR0Automaton returns nil without any error")
+ }
+ }
+
+ initialState := automaton.states[automaton.initialState]
+ if initialState == nil {
+ t.Errorf("failed to get an initial status: %v", automaton.initialState)
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ genLR0Item("s'", 0, "s"),
+ },
+ 1: {
+ genLR0Item("s'", 1, "s"),
+ },
+ 2: {
+ genLR0Item("s", 1, "foo", "bar"),
+ },
+ 3: {
+ genLR0Item("s", 2, "foo", "bar"),
+ },
+ 4: {
+ genLR0Item("bar", 1, "b"),
+ },
+ }
+
+ expectedStates := []*expectedLRState{
+ {
+ kernelItems: expectedKernels[0],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("s"): expectedKernels[1],
+ genSym("foo"): expectedKernels[2],
+ },
+ reducibleProds: []*production{
+ genProd("foo"),
+ },
+ emptyProdItems: []*lrItem{
+ genLR0Item("foo", 0),
+ },
+ },
+ {
+ kernelItems: expectedKernels[1],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s'", "s"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ nextStates: map[symbol.Symbol][]*lrItem{
+ genSym("bar"): expectedKernels[3],
+ genSym("b"): expectedKernels[4],
+ },
+ reducibleProds: []*production{
+ genProd("bar"),
+ },
+ emptyProdItems: []*lrItem{
+ genLR0Item("bar", 0),
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("s", "foo", "bar"),
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ nextStates: map[symbol.Symbol][]*lrItem{},
+ reducibleProds: []*production{
+ genProd("bar", "b"),
+ },
+ },
+ }
+
+ testLRAutomaton(t, expectedStates, automaton)
+}
+
+func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) {
+ if len(automaton.states) != len(expected) {
+ t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states))
+ }
+
+ for i, eState := range expected {
+ t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) {
+ k, err := newKernel(eState.kernelItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+
+ state, ok := automaton.states[k.id]
+ if !ok {
+ t.Fatalf("a kernel was not found: %v", k.id)
+ }
+
+ // test look-ahead symbols
+ {
+ if len(state.kernel.items) != len(eState.kernelItems) {
+ t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items))
+ }
+ for _, eKItem := range eState.kernelItems {
+ var kItem *lrItem
+ for _, it := range state.kernel.items {
+ if it.id != eKItem.id {
+ continue
+ }
+ kItem = it
+ break
+ }
+ if kItem == nil {
+ t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id)
+ }
+
+ if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) {
+ t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols))
+ }
+
+ for eSym := range eKItem.lookAhead.symbols {
+ if _, ok := kItem.lookAhead.symbols[eSym]; !ok {
+ t.Errorf("look-ahead symbol not found: %v", eSym)
+ }
+ }
+ }
+ }
+
+ // test next states
+ {
+ if len(state.next) != len(eState.nextStates) {
+ t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next))
+ }
+ for eSym, eKItems := range eState.nextStates {
+ nextStateKernel, err := newKernel(eKItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ nextState, ok := state.next[eSym]
+ if !ok {
+ t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym)
+ }
+ if nextState != nextStateKernel.id {
+ t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState)
+ }
+ }
+ }
+
+ // test reducible productions
+ {
+ if len(state.reducible) != len(eState.reducibleProds) {
+ t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible))
+ }
+ for _, eProd := range eState.reducibleProds {
+ if _, ok := state.reducible[eProd.id]; !ok {
+ t.Errorf("reducible production was not found: %v", eProd.id)
+ }
+ }
+
+ if len(state.emptyProdItems) != len(eState.emptyProdItems) {
+ t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems))
+ }
+ for _, eItem := range eState.emptyProdItems {
+ found := false
+ for _, item := range state.emptyProdItems {
+ if item.id != eItem.id {
+ continue
+ }
+ found = true
+ break
+ }
+ if !found {
+ t.Errorf("empty production item not found: %v", eItem.id)
+ }
+ }
+ }
+ })
+ }
+}
+
+type expectedState struct {
+ kernelItems []*lrItem
+ acts map[symbol.Symbol]testActionEntry
+ goTos map[symbol.Symbol][]*lrItem
+}
+
+func TestGenLALRParsingTable(t *testing.T) {
+ src := `
+#name test;
+
+s: l eq r | r;
+l: ref r | id;
+r: l;
+eq: '=';
+ref: '*';
+id: "[A-Za-z0-9_]+";
+`
+
+ var ptab *ParsingTable
+ var automaton *lalr1Automaton
+ var gram *Grammar
+ var nonTermCount int
+ var termCount int
+ {
+ ast, err := parser.Parse(strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ b := GrammarBuilder{
+ AST: ast,
+ }
+ gram, err = b.build()
+ if err != nil {
+ t.Fatal(err)
+ }
+ first, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ t.Fatal(err)
+ }
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
+ if err != nil {
+ t.Fatal(err)
+ }
+ automaton, err = genLALR1Automaton(lr0, gram.productionSet, first)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ nonTermTexts, err := gram.symbolTable.NonTerminalTexts()
+ if err != nil {
+ t.Fatal(err)
+ }
+ termTexts, err := gram.symbolTable.TerminalTexts()
+ if err != nil {
+ t.Fatal(err)
+ }
+ nonTermCount = len(nonTermTexts)
+ termCount = len(termTexts)
+
+ lalr := &lrTableBuilder{
+ automaton: automaton.lr0Automaton,
+ prods: gram.productionSet,
+ termCount: termCount,
+ nonTermCount: nonTermCount,
+ symTab: gram.symbolTable,
+ }
+ ptab, err = lalr.build()
+ if err != nil {
+ t.Fatalf("failed to create a LALR parsing table: %v", err)
+ }
+ if ptab == nil {
+ t.Fatal("genLALRParsingTable returns nil without any error")
+ }
+ }
+
+ genSym := newTestSymbolGenerator(t, gram.symbolTable)
+ genProd := newTestProductionGenerator(t, genSym)
+ genLR0Item := newTestLR0ItemGenerator(t, genProd)
+
+ expectedKernels := map[int][]*lrItem{
+ 0: {
+ withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
+ },
+ 1: {
+ withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
+ },
+ 2: {
+ withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
+ withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
+ },
+ 3: {
+ withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
+ },
+ 4: {
+ withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 5: {
+ withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 6: {
+ withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
+ },
+ 7: {
+ withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 8: {
+ withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
+ },
+ 9: {
+ withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
+ },
+ }
+
+ expectedStates := []expectedState{
+ {
+ kernelItems: expectedKernels[0],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("ref"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol.Symbol][]*lrItem{
+ genSym("s"): expectedKernels[1],
+ genSym("l"): expectedKernels[2],
+ genSym("r"): expectedKernels[3],
+ },
+ },
+ {
+ kernelItems: expectedKernels[1],
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("s'", "s"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[2],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("eq"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[6],
+ },
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("r", "l"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[3],
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("s", "r"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[4],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("ref"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol.Symbol][]*lrItem{
+ genSym("r"): expectedKernels[7],
+ genSym("l"): expectedKernels[8],
+ },
+ },
+ {
+ kernelItems: expectedKernels[5],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("eq"): {
+ ty: ActionTypeReduce,
+ production: genProd("l", "id"),
+ },
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("l", "id"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[6],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("ref"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[4],
+ },
+ genSym("id"): {
+ ty: ActionTypeShift,
+ nextState: expectedKernels[5],
+ },
+ },
+ goTos: map[symbol.Symbol][]*lrItem{
+ genSym("l"): expectedKernels[8],
+ genSym("r"): expectedKernels[9],
+ },
+ },
+ {
+ kernelItems: expectedKernels[7],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("eq"): {
+ ty: ActionTypeReduce,
+ production: genProd("l", "ref", "r"),
+ },
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("l", "ref", "r"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[8],
+ acts: map[symbol.Symbol]testActionEntry{
+ genSym("eq"): {
+ ty: ActionTypeReduce,
+ production: genProd("r", "l"),
+ },
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("r", "l"),
+ },
+ },
+ },
+ {
+ kernelItems: expectedKernels[9],
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
+ ty: ActionTypeReduce,
+ production: genProd("s", "l", "eq", "r"),
+ },
+ },
+ },
+ }
+
+ t.Run("initial state", func(t *testing.T) {
+ iniState := findStateByNum(automaton.states, ptab.InitialState)
+ if iniState == nil {
+ t.Fatalf("the initial state was not found: #%v", ptab.InitialState)
+ }
+ eIniState, err := newKernel(expectedKernels[0])
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ if iniState.id != eIniState.id {
+ t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id)
+ }
+ })
+
+ for i, eState := range expectedStates {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ k, err := newKernel(eState.kernelItems)
+ if err != nil {
+ t.Fatalf("failed to create a kernel item: %v", err)
+ }
+ state, ok := automaton.states[k.id]
+ if !ok {
+ t.Fatalf("state was not found: #%v", 0)
+ }
+
+ testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount)
+ testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount)
+ })
+ }
+}
+
+func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) {
+ nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
+ for eSym, eAct := range expectedState.acts {
+ nonEmptyEntries[eSym.Num()] = struct{}{}
+
+ ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num())
+ if ty != eAct.ty {
+ t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty)
+ }
+ switch eAct.ty {
+ case ActionTypeShift:
+ eNextState, err := newKernel(eAct.nextState)
+ if err != nil {
+ t.Fatal(err)
+ }
+ nextState := findStateByNum(automaton.states, stateNum)
+ if nextState == nil {
+ t.Fatalf("state was not found; state: #%v", stateNum)
+ }
+ if nextState.id != eNextState.id {
+ t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
+ }
+ case ActionTypeReduce:
+ prod := findProductionByNum(gram.productionSet, prodNum)
+ if prod == nil {
+ t.Fatalf("production was not found: #%v", prodNum)
+ }
+ if prod.id != eAct.production.id {
+ t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id)
+ }
+ }
+ }
+ for symNum := 0; symNum < termCount; symNum++ {
+ if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
+ continue
+ }
+ ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum))
+ if ty != ActionTypeError {
+ t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum)
+ }
+ }
+}
+
+func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) {
+ nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
+ for eSym, eGoTo := range expectedState.goTos {
+ nonEmptyEntries[eSym.Num()] = struct{}{}
+
+ eNextState, err := newKernel(eGoTo)
+ if err != nil {
+ t.Fatal(err)
+ }
+ ty, stateNum := ptab.getGoTo(state.num, eSym.Num())
+ if ty != GoToTypeRegistered {
+ t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym)
+ }
+ nextState := findStateByNum(automaton.states, stateNum)
+ if nextState == nil {
+ t.Fatalf("state was not found: #%v", stateNum)
+ }
+ if nextState.id != eNextState.id {
+ t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
+ }
+ }
+ for symNum := 0; symNum < nonTermCount; symNum++ {
+ if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
+ continue
+ }
+ ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum))
+ if ty != GoToTypeError {
+ t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum)
+ }
+ }
+}
+
+type testActionEntry struct {
+ ty ActionType
+ nextState []*lrItem
+ production *production
+}
+
+func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState {
+ for _, state := range states {
+ if state.num == num {
+ return state
+ }
+ }
+ return nil
+}
+
+func findProductionByNum(prods *productionSet, num productionNum) *production {
+ for _, prod := range prods.getAllProductions() {
+ if prod.num == num {
+ return prod
+ }
+ }
+ return nil
+}
+
+type testSymbolGenerator func(text string) symbol.Symbol
+
+func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator {
+ return func(text string) symbol.Symbol {
+ t.Helper()
+
+ sym, ok := symTab.ToSymbol(text)
+ if !ok {
+ t.Fatalf("symbol was not found: %v", text)
+ }
+ return sym
+ }
+}
+
+type testProductionGenerator func(lhs string, rhs ...string) *production
+
+func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator {
+ return func(lhs string, rhs ...string) *production {
+ t.Helper()
+
+ rhsSym := []symbol.Symbol{}
+ for _, text := range rhs {
+ rhsSym = append(rhsSym, genSym(text))
+ }
+ prod, err := newProduction(genSym(lhs), rhsSym)
+ if err != nil {
+ t.Fatalf("failed to create a production: %v", err)
+ }
+
+ return prod
+ }
+}
+
+type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem
+
+func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator {
+ return func(lhs string, dot int, rhs ...string) *lrItem {
+ t.Helper()
+
+ prod := genProd(lhs, rhs...)
+ item, err := newLR0Item(prod, dot)
+ if err != nil {
+ t.Fatalf("failed to create a LR0 item: %v", err)
+ }
+
+ return item
+ }
+}
+
+func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem {
+ if item.lookAhead.symbols == nil {
+ item.lookAhead.symbols = map[symbol.Symbol]struct{}{}
+ }
+
+ for _, a := range lookAhead {
+ item.lookAhead.symbols[a] = struct{}{}
+ }
+
+ return item
+}
diff --git a/tests/unit/grammar/first_test.go b/tests/unit/grammar/first_test.go
deleted file mode 100644
index 9625ef6..0000000
--- a/tests/unit/grammar/first_test.go
+++ /dev/null
@@ -1,219 +0,0 @@
-package grammar
-
-import (
- "strings"
- "testing"
-
- "urubu/grammar/symbol"
- "urubu/spec/grammar/parser"
-)
-
-type first struct {
- lhs string
- num int
- dot int
- symbols []string
- empty bool
-}
-
-func TestGenFirst(t *testing.T) {
- tests := []struct {
- caption string
- src string
- first []first
- }{
- {
- caption: "productions contain only non-empty productions",
- src: `
-#name test;
-
-expr
- : expr add term
- | term
- ;
-term
- : term mul factor
- | factor
- ;
-factor
- : l_paren expr r_paren
- | id
- ;
-add: "\+";
-mul: "\*";
-l_paren: "\(";
-r_paren: "\)";
-id: "[A-Za-z_][0-9A-Za-z_]*";
-`,
- first: []first{
- {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}},
- {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
- {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}},
- {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
- {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}},
- {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}},
- {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}},
- {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}},
- },
- },
- {
- caption: "productions contain the empty start production",
- src: `
-#name test;
-
-s
- :
- ;
-`,
- first: []first{
- {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true},
- {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true},
- },
- },
- {
- caption: "productions contain an empty production",
- src: `
-#name test;
-
-s
- : foo bar
- ;
-foo
- :
- ;
-bar: "bar";
-`,
- first: []first{
- {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
- {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
- {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true},
- },
- },
- {
- caption: "a start production contains a non-empty alternative and empty alternative",
- src: `
-#name test;
-
-s
- : foo
- |
- ;
-foo: "foo";
-`,
- first: []first{
- {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true},
- {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}},
- {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true},
- },
- },
- {
- caption: "a production contains non-empty alternative and empty alternative",
- src: `
-#name test;
-
-s
- : foo
- ;
-foo
- : bar
- |
- ;
-bar: "bar";
-`,
- first: []first{
- {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
- {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
- {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}},
- {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true},
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.caption, func(t *testing.T) {
- fst, gram := genActualFirst(t, tt.src)
-
- for _, ttFirst := range tt.first {
- lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs)
- if !ok {
- t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs)
- }
-
- prod, ok := gram.productionSet.findByLHS(lhsSym)
- if !ok {
- t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym)
- }
-
- actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot)
- if err != nil {
- t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err)
- }
-
- expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable)
-
- testFirst(t, actualFirst, expectedFirst)
- }
- })
- }
-}
-
-func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err := b.build()
- if err != nil {
- t.Fatal(err)
- }
- fst, err := genFirstSet(gram.productionSet)
- if err != nil {
- t.Fatal(err)
- }
- if fst == nil {
- t.Fatal("genFiest returned nil without any error")
- }
-
- return fst, gram
-}
-
-func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry {
- t.Helper()
-
- entry := newFirstEntry()
- if empty {
- entry.addEmpty()
- }
- for _, sym := range symbols {
- symSym, ok := symTab.ToSymbol(sym)
- if !ok {
- t.Fatalf("a symbol was not found; symbol: %v", sym)
- }
- entry.add(symSym)
- }
-
- return entry
-}
-
-func testFirst(t *testing.T, actual, expected *firstEntry) {
- if actual.empty != expected.empty {
- t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty)
- }
-
- if len(actual.symbols) != len(expected.symbols) {
- t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
- }
-
- for eSym := range expected.symbols {
- if _, ok := actual.symbols[eSym]; !ok {
- t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
- }
- }
-}
diff --git a/tests/unit/grammar/lalr1_test.go b/tests/unit/grammar/lalr1_test.go
deleted file mode 100644
index fd09333..0000000
--- a/tests/unit/grammar/lalr1_test.go
+++ /dev/null
@@ -1,187 +0,0 @@
-package grammar
-
-import (
- "strings"
- "testing"
-
- "urubu/grammar/symbol"
- "urubu/spec/grammar/parser"
-)
-
-func TestGenLALR1Automaton(t *testing.T) {
- // This grammar belongs to LALR(1) class, not SLR(1).
- src := `
-#name test;
-
-s: l eq r | r;
-l: ref r | id;
-r: l;
-eq: '=';
-ref: '*';
-id: "[A-Za-z0-9_]+";
-`
-
- var gram *Grammar
- var automaton *lalr1Automaton
- {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err = b.build()
- if err != nil {
- t.Fatal(err)
- }
-
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- t.Fatalf("failed to create a LR0 automaton: %v", err)
- }
-
- firstSet, err := genFirstSet(gram.productionSet)
- if err != nil {
- t.Fatalf("failed to create a FIRST set: %v", err)
- }
-
- automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet)
- if err != nil {
- t.Fatalf("failed to create a LALR1 automaton: %v", err)
- }
- if automaton == nil {
- t.Fatalf("genLALR1Automaton returns nil without any error")
- }
- }
-
- initialState := automaton.states[automaton.initialState]
- if initialState == nil {
- t.Errorf("failed to get an initial status: %v", automaton.initialState)
- }
-
- genSym := newTestSymbolGenerator(t, gram.symbolTable)
- genProd := newTestProductionGenerator(t, genSym)
- genLR0Item := newTestLR0ItemGenerator(t, genProd)
-
- expectedKernels := map[int][]*lrItem{
- 0: {
- withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
- },
- 1: {
- withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
- },
- 2: {
- withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
- withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
- },
- 3: {
- withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
- },
- 4: {
- withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
- },
- 5: {
- withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
- },
- 6: {
- withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
- },
- 7: {
- withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
- },
- 8: {
- withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
- },
- 9: {
- withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
- },
- }
-
- expectedStates := []*expectedLRState{
- {
- kernelItems: expectedKernels[0],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("s"): expectedKernels[1],
- genSym("l"): expectedKernels[2],
- genSym("r"): expectedKernels[3],
- genSym("ref"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[1],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s'", "s"),
- },
- },
- {
- kernelItems: expectedKernels[2],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("eq"): expectedKernels[6],
- },
- reducibleProds: []*production{
- genProd("r", "l"),
- },
- },
- {
- kernelItems: expectedKernels[3],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s", "r"),
- },
- },
- {
- kernelItems: expectedKernels[4],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("r"): expectedKernels[7],
- genSym("l"): expectedKernels[8],
- genSym("ref"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[5],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("l", "id"),
- },
- },
- {
- kernelItems: expectedKernels[6],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("r"): expectedKernels[9],
- genSym("l"): expectedKernels[8],
- genSym("ref"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[7],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("l", "ref", "r"),
- },
- },
- {
- kernelItems: expectedKernels[8],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("r", "l"),
- },
- },
- {
- kernelItems: expectedKernels[9],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s", "l", "eq", "r"),
- },
- },
- }
-
- testLRAutomaton(t, expectedStates, automaton.lr0Automaton)
-}
diff --git a/tests/unit/grammar/lexical/compiler_test.go b/tests/unit/grammar/lexical.go
index b621cd2..b621cd2 100644
--- a/tests/unit/grammar/lexical/compiler_test.go
+++ b/tests/unit/grammar/lexical.go
diff --git a/tests/unit/grammar/lexical/dfa/tree_test.go b/tests/unit/grammar/lexical/dfa.go
index de3ebbb..1a3e16a 100644
--- a/tests/unit/grammar/lexical/dfa/tree_test.go
+++ b/tests/unit/grammar/lexical/dfa.go
@@ -9,6 +9,191 @@ import (
spec "urubu/spec/grammar"
)
+func TestGenDFA(t *testing.T) {
+ p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
+ cpt, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+ bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
+ spec.LexModeKindIDMin: cpt,
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+ dfa := GenDFA(bt, symTab)
+ if dfa == nil {
+ t.Fatalf("DFA is nil")
+ }
+
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3))
+ s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4))
+ s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5))
+ s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6))
+
+ rune2Int := func(char rune, index int) uint8 {
+ return uint8([]byte(string(char))[index])
+ }
+
+ tranS0 := [256]string{}
+ tranS0[rune2Int('a', 0)] = s1.hash()
+ tranS0[rune2Int('b', 0)] = s0.hash()
+
+ tranS1 := [256]string{}
+ tranS1[rune2Int('a', 0)] = s1.hash()
+ tranS1[rune2Int('b', 0)] = s2.hash()
+
+ tranS2 := [256]string{}
+ tranS2[rune2Int('a', 0)] = s1.hash()
+ tranS2[rune2Int('b', 0)] = s3.hash()
+
+ tranS3 := [256]string{}
+ tranS3[rune2Int('a', 0)] = s1.hash()
+ tranS3[rune2Int('b', 0)] = s0.hash()
+
+ expectedTranTab := map[string][256]string{
+ s0.hash(): tranS0,
+ s1.hash(): tranS1,
+ s2.hash(): tranS2,
+ s3.hash(): tranS3,
+ }
+ if len(dfa.TransitionTable) != len(expectedTranTab) {
+ t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable))
+ }
+ for h, eTranTab := range expectedTranTab {
+ tranTab, ok := dfa.TransitionTable[h]
+ if !ok {
+ t.Errorf("no entry; hash: %v", h)
+ continue
+ }
+ if len(tranTab) != len(eTranTab) {
+ t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab))
+ }
+ for c, eNext := range eTranTab {
+ if eNext == "" {
+ continue
+ }
+
+ next := tranTab[c]
+ if next == "" {
+ t.Errorf("no enatry: hash: %v, char: %v", h, c)
+ }
+ if next != eNext {
+ t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next)
+ }
+ }
+ }
+
+ if dfa.InitialState != s0.hash() {
+ t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState)
+ }
+
+ accTab := map[string]spec.LexModeKindID{
+ s3.hash(): 1,
+ }
+ if len(dfa.AcceptingStatesTable) != len(accTab) {
+ t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable))
+ }
+ for eState, eID := range accTab {
+ id, ok := dfa.AcceptingStatesTable[eState]
+ if !ok {
+ t.Errorf("accepting state is not found: state: %v", eState)
+ }
+ if id != eID {
+ t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id)
+ }
+ }
+}
+
+func TestNewSymbolPosition(t *testing.T) {
+ tests := []struct {
+ n uint16
+ endMark bool
+ err bool
+ }{
+ {
+ n: 0,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: 0,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: true,
+ err: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) {
+ pos, err := newSymbolPosition(tt.n, tt.endMark)
+ if tt.err {
+ if err == nil {
+ t.Fatal("err is nil")
+ }
+ return
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, endMark := pos.describe()
+ if n != tt.n || endMark != tt.endMark {
+ t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark)
+ }
+ })
+ }
+}
+
func TestByteTree(t *testing.T) {
tests := []struct {
root byteTree
diff --git a/tests/unit/grammar/lexical/dfa/dfa_test.go b/tests/unit/grammar/lexical/dfa/dfa_test.go
deleted file mode 100644
index 38577cf..0000000
--- a/tests/unit/grammar/lexical/dfa/dfa_test.go
+++ /dev/null
@@ -1,121 +0,0 @@
-package dfa
-
-import (
- "strings"
- "testing"
-
- "urubu/grammar/lexical/parser"
- spec "urubu/spec/grammar"
-)
-
-func TestGenDFA(t *testing.T) {
- p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
- cpt, err := p.Parse()
- if err != nil {
- t.Fatal(err)
- }
- bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
- spec.LexModeKindIDMin: cpt,
- })
- if err != nil {
- t.Fatal(err)
- }
- dfa := GenDFA(bt, symTab)
- if dfa == nil {
- t.Fatalf("DFA is nil")
- }
-
- symPos := func(n uint16) symbolPosition {
- pos, err := newSymbolPosition(n, false)
- if err != nil {
- panic(err)
- }
- return pos
- }
-
- endPos := func(n uint16) symbolPosition {
- pos, err := newSymbolPosition(n, true)
- if err != nil {
- panic(err)
- }
- return pos
- }
-
- s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3))
- s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4))
- s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5))
- s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6))
-
- rune2Int := func(char rune, index int) uint8 {
- return uint8([]byte(string(char))[index])
- }
-
- tranS0 := [256]string{}
- tranS0[rune2Int('a', 0)] = s1.hash()
- tranS0[rune2Int('b', 0)] = s0.hash()
-
- tranS1 := [256]string{}
- tranS1[rune2Int('a', 0)] = s1.hash()
- tranS1[rune2Int('b', 0)] = s2.hash()
-
- tranS2 := [256]string{}
- tranS2[rune2Int('a', 0)] = s1.hash()
- tranS2[rune2Int('b', 0)] = s3.hash()
-
- tranS3 := [256]string{}
- tranS3[rune2Int('a', 0)] = s1.hash()
- tranS3[rune2Int('b', 0)] = s0.hash()
-
- expectedTranTab := map[string][256]string{
- s0.hash(): tranS0,
- s1.hash(): tranS1,
- s2.hash(): tranS2,
- s3.hash(): tranS3,
- }
- if len(dfa.TransitionTable) != len(expectedTranTab) {
- t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable))
- }
- for h, eTranTab := range expectedTranTab {
- tranTab, ok := dfa.TransitionTable[h]
- if !ok {
- t.Errorf("no entry; hash: %v", h)
- continue
- }
- if len(tranTab) != len(eTranTab) {
- t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab))
- }
- for c, eNext := range eTranTab {
- if eNext == "" {
- continue
- }
-
- next := tranTab[c]
- if next == "" {
- t.Errorf("no enatry: hash: %v, char: %v", h, c)
- }
- if next != eNext {
- t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next)
- }
- }
- }
-
- if dfa.InitialState != s0.hash() {
- t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState)
- }
-
- accTab := map[string]spec.LexModeKindID{
- s3.hash(): 1,
- }
- if len(dfa.AcceptingStatesTable) != len(accTab) {
- t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable))
- }
- for eState, eID := range accTab {
- id, ok := dfa.AcceptingStatesTable[eState]
- if !ok {
- t.Errorf("accepting state is not found: state: %v", eState)
- }
- if id != eID {
- t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id)
- }
- }
-}
diff --git a/tests/unit/grammar/lexical/dfa/symbol_position_test.go b/tests/unit/grammar/lexical/dfa/symbol_position_test.go
deleted file mode 100644
index c867f64..0000000
--- a/tests/unit/grammar/lexical/dfa/symbol_position_test.go
+++ /dev/null
@@ -1,79 +0,0 @@
-package dfa
-
-import (
- "fmt"
- "testing"
-)
-
-func TestNewSymbolPosition(t *testing.T) {
- tests := []struct {
- n uint16
- endMark bool
- err bool
- }{
- {
- n: 0,
- endMark: false,
- err: true,
- },
- {
- n: 0,
- endMark: true,
- err: true,
- },
- {
- n: symbolPositionMin - 1,
- endMark: false,
- err: true,
- },
- {
- n: symbolPositionMin - 1,
- endMark: true,
- err: true,
- },
- {
- n: symbolPositionMin,
- endMark: false,
- },
- {
- n: symbolPositionMin,
- endMark: true,
- },
- {
- n: symbolPositionMax,
- endMark: false,
- },
- {
- n: symbolPositionMax,
- endMark: true,
- },
- {
- n: symbolPositionMax + 1,
- endMark: false,
- err: true,
- },
- {
- n: symbolPositionMax + 1,
- endMark: true,
- err: true,
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) {
- pos, err := newSymbolPosition(tt.n, tt.endMark)
- if tt.err {
- if err == nil {
- t.Fatal("err is nil")
- }
- return
- }
- if err != nil {
- t.Fatal(err)
- }
- n, endMark := pos.describe()
- if n != tt.n || endMark != tt.endMark {
- t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark)
- }
- })
- }
-}
diff --git a/tests/unit/grammar/lexical/parser/parser_test.go b/tests/unit/grammar/lexical/parser.go
index 4c9557d..d5d7039 100644
--- a/tests/unit/grammar/lexical/parser/parser_test.go
+++ b/tests/unit/grammar/lexical/parser.go
@@ -10,6 +10,524 @@ import (
"urubu/ucd"
)
+func TestLexer(t *testing.T) {
+ tests := []struct {
+ caption string
+ src string
+ tokens []*token
+ err error
+ }{
+ {
+ caption: "lexer can recognize ordinaly characters",
+ src: "123abcいろは",
+ tokens: []*token{
+ newToken(tokenKindChar, '1'),
+ newToken(tokenKindChar, '2'),
+ newToken(tokenKindChar, '3'),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, 'b'),
+ newToken(tokenKindChar, 'c'),
+ newToken(tokenKindChar, 'い'),
+ newToken(tokenKindChar, 'ろ'),
+ newToken(tokenKindChar, 'は'),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters in default mode",
+ src: ".*+?|()[\\u",
+ tokens: []*token{
+ newToken(tokenKindAnyChar, nullChar),
+ newToken(tokenKindRepeat, nullChar),
+ newToken(tokenKindRepeatOneOrMore, nullChar),
+ newToken(tokenKindOption, nullChar),
+ newToken(tokenKindAlt, nullChar),
+ newToken(tokenKindGroupOpen, nullChar),
+ newToken(tokenKindGroupClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the escape sequences in default mode",
+ src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[",
+ tokens: []*token{
+ newToken(tokenKindChar, '\\'),
+ newToken(tokenKindChar, '.'),
+ newToken(tokenKindChar, '*'),
+ newToken(tokenKindChar, '+'),
+ newToken(tokenKindChar, '?'),
+ newToken(tokenKindChar, '|'),
+ newToken(tokenKindChar, '('),
+ newToken(tokenKindChar, ')'),
+ newToken(tokenKindChar, '['),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "], {, and } are treated as an ordinary character in default mode",
+ src: "]{}",
+ tokens: []*token{
+ newToken(tokenKindChar, ']'),
+ newToken(tokenKindChar, '{'),
+ newToken(tokenKindChar, '}'),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters in bracket expression mode",
+ src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("09AF"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("09abcf"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the escape sequences in bracket expression mode",
+ src: "[\\^a\\-z]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "in a bracket expression, the special characters are also handled as normal characters",
+ src: "[\\\\.*+?|()[",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '\\'),
+ newToken(tokenKindChar, '.'),
+ newToken(tokenKindChar, '*'),
+ newToken(tokenKindChar, '+'),
+ newToken(tokenKindChar, '?'),
+ newToken(tokenKindChar, '|'),
+ newToken(tokenKindChar, '('),
+ newToken(tokenKindChar, ')'),
+ newToken(tokenKindChar, '['),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
+ // [...-...][...-][-...][-]
+ // ~~~~~~~ ~ ~ ~
+ // ^ ^ ^ ^
+ // | | | `-- Ordinary Character (b)
+ // | | `-- Ordinary Character (b)
+ // | `-- Ordinary Character (b)
+ // `-- Character Range (a)
+ //
+ // a. *-* is handled as a character-range expression.
+ // b. *-, -*, or - are handled as ordinary characters.
+ src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
+ // [^...^...][^]
+ // ~~ ~ ~~
+ // ^ ^ ^^
+ // | | |`-- Ordinary Character (c)
+ // | | `-- Bracket Expression
+ // | `-- Ordinary Character (b)
+ // `-- Inverse Bracket Expression (a)
+ //
+ // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions.
+ // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols.
+ // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character.
+ src: "[^^][^]",
+ tokens: []*token{
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer raises an error when an invalid escape sequence appears",
+ src: "\\@",
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
+ src: "\\",
+ err: synErrIncompletedEscSeq,
+ },
+ {
+ caption: "lexer raises an error when an invalid escape sequence appears",
+ src: "[\\@",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
+ src: "[\\",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrIncompletedEscSeq,
+ },
+ {
+ caption: "lexer can recognize the special characters and code points in code point expression mode",
+ src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "a one digit hex string isn't a valid code point",
+ src: "\\u{0",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a two digits hex string isn't a valid code point",
+ src: "\\u{01",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a three digits hex string isn't a valid code point",
+ src: "\\u{012",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a four digits hex string is a valid code point",
+ src: "\\u{0123}",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ },
+ },
+ {
+ caption: "a five digits hex string isn't a valid code point",
+ src: "\\u{01234",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a six digits hex string is a valid code point",
+ src: "\\u{012345}",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("012345"),
+ newToken(tokenKindRBrace, nullChar),
+ },
+ },
+ {
+ caption: "a seven digits hex string isn't a valid code point",
+ src: "\\u{0123456",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a code point must be hex digits",
+ src: "\\u{g",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a code point must be hex digits",
+ src: "\\u{G",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "lexer can recognize the special characters and symbols in character property expression mode",
+ src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]",
+ tokens: []*token{
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters and symbols in fragment expression mode",
+ src: "\\f{integer}",
+ tokens: []*token{
+ newToken(tokenKindFragmentLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newFragmentSymbolToken("integer"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "a fragment expression is not supported in a bracket expression",
+ src: "[\\f",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "a fragment expression is not supported in an inverse bracket expression",
+ src: "[^\\f",
+ tokens: []*token{
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ lex := newLexer(strings.NewReader(tt.src))
+ var err error
+ var tok *token
+ i := 0
+ for {
+ tok, err = lex.next()
+ if err != nil {
+ break
+ }
+ if i >= len(tt.tokens) {
+ break
+ }
+ eTok := tt.tokens[i]
+ i++
+ testToken(t, tok, eTok)
+
+ if tok.kind == tokenKindEOF {
+ break
+ }
+ }
+ if tt.err != nil {
+ if err != ParseErr {
+ t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
+ }
+ detail, cause := lex.error()
+ if cause != tt.err {
+ t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail)
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ }
+ if i < len(tt.tokens) {
+ t.Fatalf("expecte more tokens")
+ }
+ })
+ }
+}
+
+func testToken(t *testing.T, a, e *token) {
+ t.Helper()
+ if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint {
+ t.Fatalf("unexpected token: want: %+v, got: %+v", e, a)
+ }
+}
+
func TestParse(t *testing.T) {
tests := []struct {
pattern string
diff --git a/tests/unit/grammar/lexical/parser/lexer_test.go b/tests/unit/grammar/lexical/parser/lexer_test.go
deleted file mode 100644
index 055466e..0000000
--- a/tests/unit/grammar/lexical/parser/lexer_test.go
+++ /dev/null
@@ -1,524 +0,0 @@
-package parser
-
-import (
- "strings"
- "testing"
-)
-
-func TestLexer(t *testing.T) {
- tests := []struct {
- caption string
- src string
- tokens []*token
- err error
- }{
- {
- caption: "lexer can recognize ordinaly characters",
- src: "123abcいろは",
- tokens: []*token{
- newToken(tokenKindChar, '1'),
- newToken(tokenKindChar, '2'),
- newToken(tokenKindChar, '3'),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindChar, 'b'),
- newToken(tokenKindChar, 'c'),
- newToken(tokenKindChar, 'い'),
- newToken(tokenKindChar, 'ろ'),
- newToken(tokenKindChar, 'は'),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the special characters in default mode",
- src: ".*+?|()[\\u",
- tokens: []*token{
- newToken(tokenKindAnyChar, nullChar),
- newToken(tokenKindRepeat, nullChar),
- newToken(tokenKindRepeatOneOrMore, nullChar),
- newToken(tokenKindOption, nullChar),
- newToken(tokenKindAlt, nullChar),
- newToken(tokenKindGroupOpen, nullChar),
- newToken(tokenKindGroupClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the escape sequences in default mode",
- src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[",
- tokens: []*token{
- newToken(tokenKindChar, '\\'),
- newToken(tokenKindChar, '.'),
- newToken(tokenKindChar, '*'),
- newToken(tokenKindChar, '+'),
- newToken(tokenKindChar, '?'),
- newToken(tokenKindChar, '|'),
- newToken(tokenKindChar, '('),
- newToken(tokenKindChar, ')'),
- newToken(tokenKindChar, '['),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "], {, and } are treated as an ordinary character in default mode",
- src: "]{}",
- tokens: []*token{
- newToken(tokenKindChar, ']'),
- newToken(tokenKindChar, '{'),
- newToken(tokenKindChar, '}'),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the special characters in bracket expression mode",
- src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("09AF"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("09abcf"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the escape sequences in bracket expression mode",
- src: "[\\^a\\-z]",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '^'),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "in a bracket expression, the special characters are also handled as normal characters",
- src: "[\\\\.*+?|()[",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '\\'),
- newToken(tokenKindChar, '.'),
- newToken(tokenKindChar, '*'),
- newToken(tokenKindChar, '+'),
- newToken(tokenKindChar, '?'),
- newToken(tokenKindChar, '|'),
- newToken(tokenKindChar, '('),
- newToken(tokenKindChar, ')'),
- newToken(tokenKindChar, '['),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
- // [...-...][...-][-...][-]
- // ~~~~~~~ ~ ~ ~
- // ^ ^ ^ ^
- // | | | `-- Ordinary Character (b)
- // | | `-- Ordinary Character (b)
- // | `-- Ordinary Character (b)
- // `-- Character Range (a)
- //
- // a. *-* is handled as a character-range expression.
- // b. *-, -*, or - are handled as ordinary characters.
- src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
- // [^...^...][^]
- // ~~ ~ ~~
- // ^ ^ ^^
- // | | |`-- Ordinary Character (c)
- // | | `-- Bracket Expression
- // | `-- Ordinary Character (b)
- // `-- Inverse Bracket Expression (a)
- //
- // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions.
- // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols.
- // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character.
- src: "[^^][^]",
- tokens: []*token{
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '^'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '^'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer raises an error when an invalid escape sequence appears",
- src: "\\@",
- err: synErrInvalidEscSeq,
- },
- {
- caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
- src: "\\",
- err: synErrIncompletedEscSeq,
- },
- {
- caption: "lexer raises an error when an invalid escape sequence appears",
- src: "[\\@",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- },
- err: synErrInvalidEscSeq,
- },
- {
- caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
- src: "[\\",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- },
- err: synErrIncompletedEscSeq,
- },
- {
- caption: "lexer can recognize the special characters and code points in code point expression mode",
- src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("0123"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("4567"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("89abcd"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("efAB"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("CDEF01"),
- newToken(tokenKindRBrace, nullChar),
-
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("0123"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("4567"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("89abcd"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("efAB"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("CDEF01"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("0123"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("4567"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("89abcd"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("efAB"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("CDEF01"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "a one digit hex string isn't a valid code point",
- src: "\\u{0",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a two digits hex string isn't a valid code point",
- src: "\\u{01",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a three digits hex string isn't a valid code point",
- src: "\\u{012",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a four digits hex string is a valid code point",
- src: "\\u{0123}",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("0123"),
- newToken(tokenKindRBrace, nullChar),
- },
- },
- {
- caption: "a five digits hex string isn't a valid code point",
- src: "\\u{01234",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a six digits hex string is a valid code point",
- src: "\\u{012345}",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("012345"),
- newToken(tokenKindRBrace, nullChar),
- },
- },
- {
- caption: "a seven digits hex string isn't a valid code point",
- src: "\\u{0123456",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a code point must be hex digits",
- src: "\\u{g",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a code point must be hex digits",
- src: "\\u{G",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "lexer can recognize the special characters and symbols in character property expression mode",
- src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]",
- tokens: []*token{
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("General_Category"),
- newToken(tokenKindEqual, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
-
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("General_Category"),
- newToken(tokenKindEqual, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("General_Category"),
- newToken(tokenKindEqual, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the special characters and symbols in fragment expression mode",
- src: "\\f{integer}",
- tokens: []*token{
- newToken(tokenKindFragmentLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newFragmentSymbolToken("integer"),
- newToken(tokenKindRBrace, nullChar),
-
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "a fragment expression is not supported in a bracket expression",
- src: "[\\f",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- },
- err: synErrInvalidEscSeq,
- },
- {
- caption: "a fragment expression is not supported in an inverse bracket expression",
- src: "[^\\f",
- tokens: []*token{
- newToken(tokenKindInverseBExpOpen, nullChar),
- },
- err: synErrInvalidEscSeq,
- },
- }
- for _, tt := range tests {
- t.Run(tt.caption, func(t *testing.T) {
- lex := newLexer(strings.NewReader(tt.src))
- var err error
- var tok *token
- i := 0
- for {
- tok, err = lex.next()
- if err != nil {
- break
- }
- if i >= len(tt.tokens) {
- break
- }
- eTok := tt.tokens[i]
- i++
- testToken(t, tok, eTok)
-
- if tok.kind == tokenKindEOF {
- break
- }
- }
- if tt.err != nil {
- if err != ParseErr {
- t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
- }
- detail, cause := lex.error()
- if cause != tt.err {
- t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail)
- }
- } else {
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
- }
- if i < len(tt.tokens) {
- t.Fatalf("expecte more tokens")
- }
- })
- }
-}
-
-func testToken(t *testing.T, a, e *token) {
- t.Helper()
- if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint {
- t.Fatalf("unexpected token: want: %+v, got: %+v", e, a)
- }
-}
diff --git a/tests/unit/grammar/lr0_test.go b/tests/unit/grammar/lr0_test.go
deleted file mode 100644
index 0a9ec24..0000000
--- a/tests/unit/grammar/lr0_test.go
+++ /dev/null
@@ -1,448 +0,0 @@
-package grammar
-
-import (
- "fmt"
- "strings"
- "testing"
-
- "urubu/grammar/symbol"
- "urubu/spec/grammar/parser"
-)
-
-type expectedLRState struct {
- kernelItems []*lrItem
- nextStates map[symbol.Symbol][]*lrItem
- reducibleProds []*production
- emptyProdItems []*lrItem
-}
-
-func TestGenLR0Automaton(t *testing.T) {
- src := `
-#name test;
-
-expr
- : expr add term
- | term
- ;
-term
- : term mul factor
- | factor
- ;
-factor
- : l_paren expr r_paren
- | id
- ;
-add: "\+";
-mul: "\*";
-l_paren: "\(";
-r_paren: "\)";
-id: "[A-Za-z_][0-9A-Za-z_]*";
-`
-
- var gram *Grammar
- var automaton *lr0Automaton
- {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err = b.build()
- if err != nil {
- t.Fatal(err)
- }
-
- automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- t.Fatalf("failed to create a LR0 automaton: %v", err)
- }
- if automaton == nil {
- t.Fatalf("genLR0Automaton returns nil without any error")
- }
- }
-
- initialState := automaton.states[automaton.initialState]
- if initialState == nil {
- t.Errorf("failed to get an initial status: %v", automaton.initialState)
- }
-
- genSym := newTestSymbolGenerator(t, gram.symbolTable)
- genProd := newTestProductionGenerator(t, genSym)
- genLR0Item := newTestLR0ItemGenerator(t, genProd)
-
- expectedKernels := map[int][]*lrItem{
- 0: {
- genLR0Item("expr'", 0, "expr"),
- },
- 1: {
- genLR0Item("expr'", 1, "expr"),
- genLR0Item("expr", 1, "expr", "add", "term"),
- },
- 2: {
- genLR0Item("expr", 1, "term"),
- genLR0Item("term", 1, "term", "mul", "factor"),
- },
- 3: {
- genLR0Item("term", 1, "factor"),
- },
- 4: {
- genLR0Item("factor", 1, "l_paren", "expr", "r_paren"),
- },
- 5: {
- genLR0Item("factor", 1, "id"),
- },
- 6: {
- genLR0Item("expr", 2, "expr", "add", "term"),
- },
- 7: {
- genLR0Item("term", 2, "term", "mul", "factor"),
- },
- 8: {
- genLR0Item("expr", 1, "expr", "add", "term"),
- genLR0Item("factor", 2, "l_paren", "expr", "r_paren"),
- },
- 9: {
- genLR0Item("expr", 3, "expr", "add", "term"),
- genLR0Item("term", 1, "term", "mul", "factor"),
- },
- 10: {
- genLR0Item("term", 3, "term", "mul", "factor"),
- },
- 11: {
- genLR0Item("factor", 3, "l_paren", "expr", "r_paren"),
- },
- }
-
- expectedStates := []*expectedLRState{
- {
- kernelItems: expectedKernels[0],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("expr"): expectedKernels[1],
- genSym("term"): expectedKernels[2],
- genSym("factor"): expectedKernels[3],
- genSym("l_paren"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[1],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("add"): expectedKernels[6],
- },
- reducibleProds: []*production{
- genProd("expr'", "expr"),
- },
- },
- {
- kernelItems: expectedKernels[2],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("mul"): expectedKernels[7],
- },
- reducibleProds: []*production{
- genProd("expr", "term"),
- },
- },
- {
- kernelItems: expectedKernels[3],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("term", "factor"),
- },
- },
- {
- kernelItems: expectedKernels[4],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("expr"): expectedKernels[8],
- genSym("term"): expectedKernels[2],
- genSym("factor"): expectedKernels[3],
- genSym("l_paren"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[5],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("factor", "id"),
- },
- },
- {
- kernelItems: expectedKernels[6],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("term"): expectedKernels[9],
- genSym("factor"): expectedKernels[3],
- genSym("l_paren"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[7],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("factor"): expectedKernels[10],
- genSym("l_paren"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[8],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("add"): expectedKernels[6],
- genSym("r_paren"): expectedKernels[11],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[9],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("mul"): expectedKernels[7],
- },
- reducibleProds: []*production{
- genProd("expr", "expr", "add", "term"),
- },
- },
- {
- kernelItems: expectedKernels[10],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("term", "term", "mul", "factor"),
- },
- },
- {
- kernelItems: expectedKernels[11],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("factor", "l_paren", "expr", "r_paren"),
- },
- },
- }
-
- testLRAutomaton(t, expectedStates, automaton)
-}
-
-func TestLR0AutomatonContainingEmptyProduction(t *testing.T) {
- src := `
-#name test;
-
-s
- : foo bar
- ;
-foo
- :
- ;
-bar
- : b
- |
- ;
-
-b: "bar";
-`
-
- var gram *Grammar
- var automaton *lr0Automaton
- {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
-
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err = b.build()
- if err != nil {
- t.Fatal(err)
- }
-
- automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- t.Fatalf("failed to create a LR0 automaton: %v", err)
- }
- if automaton == nil {
- t.Fatalf("genLR0Automaton returns nil without any error")
- }
- }
-
- initialState := automaton.states[automaton.initialState]
- if initialState == nil {
- t.Errorf("failed to get an initial status: %v", automaton.initialState)
- }
-
- genSym := newTestSymbolGenerator(t, gram.symbolTable)
- genProd := newTestProductionGenerator(t, genSym)
- genLR0Item := newTestLR0ItemGenerator(t, genProd)
-
- expectedKernels := map[int][]*lrItem{
- 0: {
- genLR0Item("s'", 0, "s"),
- },
- 1: {
- genLR0Item("s'", 1, "s"),
- },
- 2: {
- genLR0Item("s", 1, "foo", "bar"),
- },
- 3: {
- genLR0Item("s", 2, "foo", "bar"),
- },
- 4: {
- genLR0Item("bar", 1, "b"),
- },
- }
-
- expectedStates := []*expectedLRState{
- {
- kernelItems: expectedKernels[0],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("s"): expectedKernels[1],
- genSym("foo"): expectedKernels[2],
- },
- reducibleProds: []*production{
- genProd("foo"),
- },
- emptyProdItems: []*lrItem{
- genLR0Item("foo", 0),
- },
- },
- {
- kernelItems: expectedKernels[1],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s'", "s"),
- },
- },
- {
- kernelItems: expectedKernels[2],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("bar"): expectedKernels[3],
- genSym("b"): expectedKernels[4],
- },
- reducibleProds: []*production{
- genProd("bar"),
- },
- emptyProdItems: []*lrItem{
- genLR0Item("bar", 0),
- },
- },
- {
- kernelItems: expectedKernels[3],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s", "foo", "bar"),
- },
- },
- {
- kernelItems: expectedKernels[4],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("bar", "b"),
- },
- },
- }
-
- testLRAutomaton(t, expectedStates, automaton)
-}
-
-func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) {
- if len(automaton.states) != len(expected) {
- t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states))
- }
-
- for i, eState := range expected {
- t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) {
- k, err := newKernel(eState.kernelItems)
- if err != nil {
- t.Fatalf("failed to create a kernel item: %v", err)
- }
-
- state, ok := automaton.states[k.id]
- if !ok {
- t.Fatalf("a kernel was not found: %v", k.id)
- }
-
- // test look-ahead symbols
- {
- if len(state.kernel.items) != len(eState.kernelItems) {
- t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items))
- }
- for _, eKItem := range eState.kernelItems {
- var kItem *lrItem
- for _, it := range state.kernel.items {
- if it.id != eKItem.id {
- continue
- }
- kItem = it
- break
- }
- if kItem == nil {
- t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id)
- }
-
- if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) {
- t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols))
- }
-
- for eSym := range eKItem.lookAhead.symbols {
- if _, ok := kItem.lookAhead.symbols[eSym]; !ok {
- t.Errorf("look-ahead symbol not found: %v", eSym)
- }
- }
- }
- }
-
- // test next states
- {
- if len(state.next) != len(eState.nextStates) {
- t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next))
- }
- for eSym, eKItems := range eState.nextStates {
- nextStateKernel, err := newKernel(eKItems)
- if err != nil {
- t.Fatalf("failed to create a kernel item: %v", err)
- }
- nextState, ok := state.next[eSym]
- if !ok {
- t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym)
- }
- if nextState != nextStateKernel.id {
- t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState)
- }
- }
- }
-
- // test reducible productions
- {
- if len(state.reducible) != len(eState.reducibleProds) {
- t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible))
- }
- for _, eProd := range eState.reducibleProds {
- if _, ok := state.reducible[eProd.id]; !ok {
- t.Errorf("reducible production was not found: %v", eProd.id)
- }
- }
-
- if len(state.emptyProdItems) != len(eState.emptyProdItems) {
- t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems))
- }
- for _, eItem := range eState.emptyProdItems {
- found := false
- for _, item := range state.emptyProdItems {
- if item.id != eItem.id {
- continue
- }
- found = true
- break
- }
- if !found {
- t.Errorf("empty production item not found: %v", eItem.id)
- }
- }
- }
- })
- }
-}
diff --git a/tests/unit/grammar/parsing_table_test.go b/tests/unit/grammar/parsing_table_test.go
deleted file mode 100644
index 342e187..0000000
--- a/tests/unit/grammar/parsing_table_test.go
+++ /dev/null
@@ -1,387 +0,0 @@
-package grammar
-
-import (
- "fmt"
- "strings"
- "testing"
-
- "urubu/grammar/symbol"
- "urubu/spec/grammar/parser"
-)
-
-type expectedState struct {
- kernelItems []*lrItem
- acts map[symbol.Symbol]testActionEntry
- goTos map[symbol.Symbol][]*lrItem
-}
-
-func TestGenLALRParsingTable(t *testing.T) {
- src := `
-#name test;
-
-s: l eq r | r;
-l: ref r | id;
-r: l;
-eq: '=';
-ref: '*';
-id: "[A-Za-z0-9_]+";
-`
-
- var ptab *ParsingTable
- var automaton *lalr1Automaton
- var gram *Grammar
- var nonTermCount int
- var termCount int
- {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err = b.build()
- if err != nil {
- t.Fatal(err)
- }
- first, err := genFirstSet(gram.productionSet)
- if err != nil {
- t.Fatal(err)
- }
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- t.Fatal(err)
- }
- automaton, err = genLALR1Automaton(lr0, gram.productionSet, first)
- if err != nil {
- t.Fatal(err)
- }
-
- nonTermTexts, err := gram.symbolTable.NonTerminalTexts()
- if err != nil {
- t.Fatal(err)
- }
- termTexts, err := gram.symbolTable.TerminalTexts()
- if err != nil {
- t.Fatal(err)
- }
- nonTermCount = len(nonTermTexts)
- termCount = len(termTexts)
-
- lalr := &lrTableBuilder{
- automaton: automaton.lr0Automaton,
- prods: gram.productionSet,
- termCount: termCount,
- nonTermCount: nonTermCount,
- symTab: gram.symbolTable,
- }
- ptab, err = lalr.build()
- if err != nil {
- t.Fatalf("failed to create a LALR parsing table: %v", err)
- }
- if ptab == nil {
- t.Fatal("genLALRParsingTable returns nil without any error")
- }
- }
-
- genSym := newTestSymbolGenerator(t, gram.symbolTable)
- genProd := newTestProductionGenerator(t, genSym)
- genLR0Item := newTestLR0ItemGenerator(t, genProd)
-
- expectedKernels := map[int][]*lrItem{
- 0: {
- withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
- },
- 1: {
- withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
- },
- 2: {
- withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
- withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
- },
- 3: {
- withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
- },
- 4: {
- withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
- },
- 5: {
- withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
- },
- 6: {
- withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
- },
- 7: {
- withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
- },
- 8: {
- withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
- },
- 9: {
- withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
- },
- }
-
- expectedStates := []expectedState{
- {
- kernelItems: expectedKernels[0],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("ref"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[4],
- },
- genSym("id"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[5],
- },
- },
- goTos: map[symbol.Symbol][]*lrItem{
- genSym("s"): expectedKernels[1],
- genSym("l"): expectedKernels[2],
- genSym("r"): expectedKernels[3],
- },
- },
- {
- kernelItems: expectedKernels[1],
- acts: map[symbol.Symbol]testActionEntry{
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("s'", "s"),
- },
- },
- },
- {
- kernelItems: expectedKernels[2],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("eq"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[6],
- },
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("r", "l"),
- },
- },
- },
- {
- kernelItems: expectedKernels[3],
- acts: map[symbol.Symbol]testActionEntry{
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("s", "r"),
- },
- },
- },
- {
- kernelItems: expectedKernels[4],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("ref"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[4],
- },
- genSym("id"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[5],
- },
- },
- goTos: map[symbol.Symbol][]*lrItem{
- genSym("r"): expectedKernels[7],
- genSym("l"): expectedKernels[8],
- },
- },
- {
- kernelItems: expectedKernels[5],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("eq"): {
- ty: ActionTypeReduce,
- production: genProd("l", "id"),
- },
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("l", "id"),
- },
- },
- },
- {
- kernelItems: expectedKernels[6],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("ref"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[4],
- },
- genSym("id"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[5],
- },
- },
- goTos: map[symbol.Symbol][]*lrItem{
- genSym("l"): expectedKernels[8],
- genSym("r"): expectedKernels[9],
- },
- },
- {
- kernelItems: expectedKernels[7],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("eq"): {
- ty: ActionTypeReduce,
- production: genProd("l", "ref", "r"),
- },
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("l", "ref", "r"),
- },
- },
- },
- {
- kernelItems: expectedKernels[8],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("eq"): {
- ty: ActionTypeReduce,
- production: genProd("r", "l"),
- },
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("r", "l"),
- },
- },
- },
- {
- kernelItems: expectedKernels[9],
- acts: map[symbol.Symbol]testActionEntry{
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("s", "l", "eq", "r"),
- },
- },
- },
- }
-
- t.Run("initial state", func(t *testing.T) {
- iniState := findStateByNum(automaton.states, ptab.InitialState)
- if iniState == nil {
- t.Fatalf("the initial state was not found: #%v", ptab.InitialState)
- }
- eIniState, err := newKernel(expectedKernels[0])
- if err != nil {
- t.Fatalf("failed to create a kernel item: %v", err)
- }
- if iniState.id != eIniState.id {
- t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id)
- }
- })
-
- for i, eState := range expectedStates {
- t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
- k, err := newKernel(eState.kernelItems)
- if err != nil {
- t.Fatalf("failed to create a kernel item: %v", err)
- }
- state, ok := automaton.states[k.id]
- if !ok {
- t.Fatalf("state was not found: #%v", 0)
- }
-
- testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount)
- testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount)
- })
- }
-}
-
-func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) {
- nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
- for eSym, eAct := range expectedState.acts {
- nonEmptyEntries[eSym.Num()] = struct{}{}
-
- ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num())
- if ty != eAct.ty {
- t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty)
- }
- switch eAct.ty {
- case ActionTypeShift:
- eNextState, err := newKernel(eAct.nextState)
- if err != nil {
- t.Fatal(err)
- }
- nextState := findStateByNum(automaton.states, stateNum)
- if nextState == nil {
- t.Fatalf("state was not found; state: #%v", stateNum)
- }
- if nextState.id != eNextState.id {
- t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
- }
- case ActionTypeReduce:
- prod := findProductionByNum(gram.productionSet, prodNum)
- if prod == nil {
- t.Fatalf("production was not found: #%v", prodNum)
- }
- if prod.id != eAct.production.id {
- t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id)
- }
- }
- }
- for symNum := 0; symNum < termCount; symNum++ {
- if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
- continue
- }
- ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum))
- if ty != ActionTypeError {
- t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum)
- }
- }
-}
-
-func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) {
- nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
- for eSym, eGoTo := range expectedState.goTos {
- nonEmptyEntries[eSym.Num()] = struct{}{}
-
- eNextState, err := newKernel(eGoTo)
- if err != nil {
- t.Fatal(err)
- }
- ty, stateNum := ptab.getGoTo(state.num, eSym.Num())
- if ty != GoToTypeRegistered {
- t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym)
- }
- nextState := findStateByNum(automaton.states, stateNum)
- if nextState == nil {
- t.Fatalf("state was not found: #%v", stateNum)
- }
- if nextState.id != eNextState.id {
- t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
- }
- }
- for symNum := 0; symNum < nonTermCount; symNum++ {
- if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
- continue
- }
- ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum))
- if ty != GoToTypeError {
- t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum)
- }
- }
-}
-
-type testActionEntry struct {
- ty ActionType
- nextState []*lrItem
- production *production
-}
-
-func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState {
- for _, state := range states {
- if state.num == num {
- return state
- }
- }
- return nil
-}
-
-func findProductionByNum(prods *productionSet, num productionNum) *production {
- for _, prod := range prods.getAllProductions() {
- if prod.num == num {
- return prod
- }
- }
- return nil
-}
diff --git a/tests/unit/grammar/symbol/symbol_test.go b/tests/unit/grammar/symbol.go
index 31c3edd..31c3edd 100644
--- a/tests/unit/grammar/symbol/symbol_test.go
+++ b/tests/unit/grammar/symbol.go
diff --git a/tests/unit/grammar/test_helper_test.go b/tests/unit/grammar/test_helper_test.go
deleted file mode 100644
index 546d2c1..0000000
--- a/tests/unit/grammar/test_helper_test.go
+++ /dev/null
@@ -1,68 +0,0 @@
-package grammar
-
-import (
- "testing"
-
- "urubu/grammar/symbol"
-)
-
-type testSymbolGenerator func(text string) symbol.Symbol
-
-func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator {
- return func(text string) symbol.Symbol {
- t.Helper()
-
- sym, ok := symTab.ToSymbol(text)
- if !ok {
- t.Fatalf("symbol was not found: %v", text)
- }
- return sym
- }
-}
-
-type testProductionGenerator func(lhs string, rhs ...string) *production
-
-func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator {
- return func(lhs string, rhs ...string) *production {
- t.Helper()
-
- rhsSym := []symbol.Symbol{}
- for _, text := range rhs {
- rhsSym = append(rhsSym, genSym(text))
- }
- prod, err := newProduction(genSym(lhs), rhsSym)
- if err != nil {
- t.Fatalf("failed to create a production: %v", err)
- }
-
- return prod
- }
-}
-
-type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem
-
-func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator {
- return func(lhs string, dot int, rhs ...string) *lrItem {
- t.Helper()
-
- prod := genProd(lhs, rhs...)
- item, err := newLR0Item(prod, dot)
- if err != nil {
- t.Fatalf("failed to create a LR0 item: %v", err)
- }
-
- return item
- }
-}
-
-func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem {
- if item.lookAhead.symbols == nil {
- item.lookAhead.symbols = map[symbol.Symbol]struct{}{}
- }
-
- for _, a := range lookAhead {
- item.lookAhead.symbols[a] = struct{}{}
- }
-
- return item
-}
diff --git a/tests/unit/spec/grammar/parser/parser_test.go b/tests/unit/spec/grammar/parser.go
index 4161f6b..773c466 100644
--- a/tests/unit/spec/grammar/parser/parser_test.go
+++ b/tests/unit/spec/grammar/parser.go
@@ -7,6 +7,234 @@ import (
verr "urubu/error"
)
+func TestLexer_Run(t *testing.T) {
+ idTok := func(text string) *token {
+ return newIDToken(text, newPosition(1, 0))
+ }
+
+ termPatTok := func(text string) *token {
+ return newTerminalPatternToken(text, newPosition(1, 0))
+ }
+
+ strTok := func(text string) *token {
+ return newStringLiteralToken(text, newPosition(1, 0))
+ }
+
+ symTok := func(kind tokenKind) *token {
+ return newSymbolToken(kind, newPosition(1, 0))
+ }
+
+ invalidTok := func(text string) *token {
+ return newInvalidToken(text, newPosition(1, 0))
+ }
+
+ tests := []struct {
+ caption string
+ src string
+ tokens []*token
+ err error
+ }{
+ {
+ caption: "the lexer can recognize all kinds of tokens",
+ src: `id"terminal"'string':|;@...#$()`,
+ tokens: []*token{
+ idTok("id"),
+ termPatTok("terminal"),
+ strTok(`string`),
+ symTok(tokenKindColon),
+ symTok(tokenKindOr),
+ symTok(tokenKindSemicolon),
+ symTok(tokenKindLabelMarker),
+ symTok(tokenKindExpantion),
+ symTok(tokenKindDirectiveMarker),
+ symTok(tokenKindOrderedSymbolMarker),
+ symTok(tokenKindLParen),
+ symTok(tokenKindRParen),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "the lexer can recognize keywords",
+ src: `fragment`,
+ tokens: []*token{
+ symTok(tokenKindKWFragment),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "the lexer can recognize character sequences and escape sequences in a terminal",
+ src: `"abc\"\\"`,
+ tokens: []*token{
+ termPatTok(`abc"\\`),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "backslashes are recognized as they are because escape sequences are not allowed in strings",
+ src: `'\\\'`,
+ tokens: []*token{
+ strTok(`\\\`),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "a pattern must include at least one character",
+ src: `""`,
+ err: synErrEmptyPattern,
+ },
+ {
+ caption: "a string must include at least one character",
+ src: `''`,
+ err: synErrEmptyString,
+ },
+ {
+ caption: "the lexer can recognize newlines and combine consecutive newlines into one",
+ src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A",
+ tokens: []*token{
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "the lexer ignores line comments",
+ src: `
+// This is the first comment.
+foo
+// This is the second comment.
+// This is the third comment.
+bar // This is the fourth comment.
+`,
+ tokens: []*token{
+ symTok(tokenKindNewline),
+ idTok("foo"),
+ symTok(tokenKindNewline),
+ idTok("bar"),
+ symTok(tokenKindNewline),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "an identifier cannot contain the capital-case letters",
+ src: `Abc`,
+ err: synErrIDInvalidChar,
+ },
+ {
+ caption: "an identifier cannot contain the capital-case letters",
+ src: `Zyx`,
+ err: synErrIDInvalidChar,
+ },
+ {
+ caption: "the underscore cannot be placed at the beginning of an identifier",
+ src: `_abc`,
+ err: synErrIDInvalidUnderscorePos,
+ },
+ {
+ caption: "the underscore cannot be placed at the end of an identifier",
+ src: `abc_`,
+ err: synErrIDInvalidUnderscorePos,
+ },
+ {
+ caption: "the underscore cannot be placed consecutively",
+ src: `a__b`,
+ err: synErrIDConsecutiveUnderscores,
+ },
+ {
+ caption: "the digits cannot be placed at the biginning of an identifier",
+ src: `0abc`,
+ err: synErrIDInvalidDigitsPos,
+ },
+ {
+ caption: "the digits cannot be placed at the biginning of an identifier",
+ src: `9abc`,
+ err: synErrIDInvalidDigitsPos,
+ },
+ {
+ caption: "an unclosed terminal is not a valid token",
+ src: `"abc`,
+ err: synErrUnclosedTerminal,
+ },
+ {
+ caption: "an incompleted escape sequence in a pattern is not a valid token",
+ src: `"\`,
+ err: synErrIncompletedEscSeq,
+ },
+ {
+ caption: "an unclosed string is not a valid token",
+ src: `'abc`,
+ err: synErrUnclosedString,
+ },
+ {
+ caption: "the lexer can recognize valid tokens following an invalid token",
+ src: `abc!!!def`,
+ tokens: []*token{
+ idTok("abc"),
+ invalidTok("!!!"),
+ idTok("def"),
+ newEOFToken(),
+ },
+ },
+ {
+ caption: "the lexer skips white spaces",
+ // \u0009: HT
+ // \u0020: SP
+ src: "a\u0009b\u0020c",
+ tokens: []*token{
+ idTok("a"),
+ idTok("b"),
+ idTok("c"),
+ newEOFToken(),
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ l, err := newLexer(strings.NewReader(tt.src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ n := 0
+ for {
+ var tok *token
+ tok, err = l.next()
+ if err != nil {
+ break
+ }
+ testToken(t, tok, tt.tokens[n])
+ n++
+ if tok.kind == tokenKindEOF {
+ break
+ }
+ }
+ if tt.err != nil {
+ synErr, ok := err.(*verr.SpecError)
+ if !ok {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
+ }
+ if tt.err != synErr.Cause {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause)
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
+ }
+ }
+ })
+ }
+}
+
+func testToken(t *testing.T, tok, expected *token) {
+ t.Helper()
+ if tok.kind != expected.kind || tok.text != expected.text {
+ t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok)
+ }
+}
+
func TestParse(t *testing.T) {
name := func(param *ParameterNode) *DirectiveNode {
return &DirectiveNode{
diff --git a/tests/unit/spec/grammar/parser/lexer_test.go b/tests/unit/spec/grammar/parser/lexer_test.go
deleted file mode 100644
index c402b42..0000000
--- a/tests/unit/spec/grammar/parser/lexer_test.go
+++ /dev/null
@@ -1,236 +0,0 @@
-package parser
-
-import (
- "strings"
- "testing"
-
- verr "urubu/error"
-)
-
-func TestLexer_Run(t *testing.T) {
- idTok := func(text string) *token {
- return newIDToken(text, newPosition(1, 0))
- }
-
- termPatTok := func(text string) *token {
- return newTerminalPatternToken(text, newPosition(1, 0))
- }
-
- strTok := func(text string) *token {
- return newStringLiteralToken(text, newPosition(1, 0))
- }
-
- symTok := func(kind tokenKind) *token {
- return newSymbolToken(kind, newPosition(1, 0))
- }
-
- invalidTok := func(text string) *token {
- return newInvalidToken(text, newPosition(1, 0))
- }
-
- tests := []struct {
- caption string
- src string
- tokens []*token
- err error
- }{
- {
- caption: "the lexer can recognize all kinds of tokens",
- src: `id"terminal"'string':|;@...#$()`,
- tokens: []*token{
- idTok("id"),
- termPatTok("terminal"),
- strTok(`string`),
- symTok(tokenKindColon),
- symTok(tokenKindOr),
- symTok(tokenKindSemicolon),
- symTok(tokenKindLabelMarker),
- symTok(tokenKindExpantion),
- symTok(tokenKindDirectiveMarker),
- symTok(tokenKindOrderedSymbolMarker),
- symTok(tokenKindLParen),
- symTok(tokenKindRParen),
- newEOFToken(),
- },
- },
- {
- caption: "the lexer can recognize keywords",
- src: `fragment`,
- tokens: []*token{
- symTok(tokenKindKWFragment),
- newEOFToken(),
- },
- },
- {
- caption: "the lexer can recognize character sequences and escape sequences in a terminal",
- src: `"abc\"\\"`,
- tokens: []*token{
- termPatTok(`abc"\\`),
- newEOFToken(),
- },
- },
- {
- caption: "backslashes are recognized as they are because escape sequences are not allowed in strings",
- src: `'\\\'`,
- tokens: []*token{
- strTok(`\\\`),
- newEOFToken(),
- },
- },
- {
- caption: "a pattern must include at least one character",
- src: `""`,
- err: synErrEmptyPattern,
- },
- {
- caption: "a string must include at least one character",
- src: `''`,
- err: synErrEmptyString,
- },
- {
- caption: "the lexer can recognize newlines and combine consecutive newlines into one",
- src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A",
- tokens: []*token{
- symTok(tokenKindNewline),
- symTok(tokenKindOr),
- symTok(tokenKindNewline),
- symTok(tokenKindOr),
- symTok(tokenKindNewline),
- symTok(tokenKindOr),
- symTok(tokenKindNewline),
- newEOFToken(),
- },
- },
- {
- caption: "the lexer ignores line comments",
- src: `
-// This is the first comment.
-foo
-// This is the second comment.
-// This is the third comment.
-bar // This is the fourth comment.
-`,
- tokens: []*token{
- symTok(tokenKindNewline),
- idTok("foo"),
- symTok(tokenKindNewline),
- idTok("bar"),
- symTok(tokenKindNewline),
- newEOFToken(),
- },
- },
- {
- caption: "an identifier cannot contain the capital-case letters",
- src: `Abc`,
- err: synErrIDInvalidChar,
- },
- {
- caption: "an identifier cannot contain the capital-case letters",
- src: `Zyx`,
- err: synErrIDInvalidChar,
- },
- {
- caption: "the underscore cannot be placed at the beginning of an identifier",
- src: `_abc`,
- err: synErrIDInvalidUnderscorePos,
- },
- {
- caption: "the underscore cannot be placed at the end of an identifier",
- src: `abc_`,
- err: synErrIDInvalidUnderscorePos,
- },
- {
- caption: "the underscore cannot be placed consecutively",
- src: `a__b`,
- err: synErrIDConsecutiveUnderscores,
- },
- {
- caption: "the digits cannot be placed at the biginning of an identifier",
- src: `0abc`,
- err: synErrIDInvalidDigitsPos,
- },
- {
- caption: "the digits cannot be placed at the biginning of an identifier",
- src: `9abc`,
- err: synErrIDInvalidDigitsPos,
- },
- {
- caption: "an unclosed terminal is not a valid token",
- src: `"abc`,
- err: synErrUnclosedTerminal,
- },
- {
- caption: "an incompleted escape sequence in a pattern is not a valid token",
- src: `"\`,
- err: synErrIncompletedEscSeq,
- },
- {
- caption: "an unclosed string is not a valid token",
- src: `'abc`,
- err: synErrUnclosedString,
- },
- {
- caption: "the lexer can recognize valid tokens following an invalid token",
- src: `abc!!!def`,
- tokens: []*token{
- idTok("abc"),
- invalidTok("!!!"),
- idTok("def"),
- newEOFToken(),
- },
- },
- {
- caption: "the lexer skips white spaces",
- // \u0009: HT
- // \u0020: SP
- src: "a\u0009b\u0020c",
- tokens: []*token{
- idTok("a"),
- idTok("b"),
- idTok("c"),
- newEOFToken(),
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.caption, func(t *testing.T) {
- l, err := newLexer(strings.NewReader(tt.src))
- if err != nil {
- t.Fatal(err)
- }
- n := 0
- for {
- var tok *token
- tok, err = l.next()
- if err != nil {
- break
- }
- testToken(t, tok, tt.tokens[n])
- n++
- if tok.kind == tokenKindEOF {
- break
- }
- }
- if tt.err != nil {
- synErr, ok := err.(*verr.SpecError)
- if !ok {
- t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
- }
- if tt.err != synErr.Cause {
- t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause)
- }
- } else {
- if err != nil {
- t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
- }
- }
- })
- }
-}
-
-func testToken(t *testing.T, tok, expected *token) {
- t.Helper()
- if tok.kind != expected.kind || tok.text != expected.text {
- t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok)
- }
-}
diff --git a/tests/unit/spec/test/parser_test.go b/tests/unit/spec/test.go
index eddba92..eddba92 100644
--- a/tests/unit/spec/test/parser_test.go
+++ b/tests/unit/spec/test.go
diff --git a/tests/unit/tester/tester_test.go b/tests/unit/tester.go
index 3c6b1db..3c6b1db 100644
--- a/tests/unit/tester/tester_test.go
+++ b/tests/unit/tester.go
diff --git a/tests/unit/utf8/utf8_test.go b/tests/unit/utf8.go
index 2dc8093..2dc8093 100644
--- a/tests/unit/utf8/utf8_test.go
+++ b/tests/unit/utf8.go