diff options
author | EuAndreh <eu@euandre.org> | 2024-12-11 16:48:12 -0300 |
---|---|---|
committer | EuAndreh <eu@euandre.org> | 2024-12-11 16:48:12 -0300 |
commit | 27b4729bd1a57740ea68e774d58d9cb3f45c5589 (patch) | |
tree | 152ff5686ade087e29e102cbbd39c0405cb63c02 /tests | |
parent | Consolidate packages spread across multiple files into single one (diff) | |
download | cotia-27b4729bd1a57740ea68e774d58d9cb3f45c5589.tar.gz cotia-27b4729bd1a57740ea68e774d58d9cb3f45c5589.tar.xz |
Do the same single file consolidation on tests
Diffstat (limited to 'tests')
-rw-r--r-- | tests/unit/compressor.go (renamed from tests/unit/compressor/compressor_test.go) | 0 | ||||
-rw-r--r-- | tests/unit/driver/lexer.go (renamed from tests/unit/driver/lexer/lexer_test.go) | 0 | ||||
-rw-r--r-- | tests/unit/driver/parser.go | 1972 | ||||
-rw-r--r-- | tests/unit/driver/parser/conflict_test.go | 524 | ||||
-rw-r--r-- | tests/unit/driver/parser/lac_test.go | 120 | ||||
-rw-r--r-- | tests/unit/driver/parser/parser_test.go | 833 | ||||
-rw-r--r-- | tests/unit/driver/parser/semantic_action_test.go | 227 | ||||
-rw-r--r-- | tests/unit/driver/parser/syntax_error_test.go | 306 | ||||
-rw-r--r-- | tests/unit/grammar.go (renamed from tests/unit/grammar/grammar_test.go) | 1266 | ||||
-rw-r--r-- | tests/unit/grammar/first_test.go | 219 | ||||
-rw-r--r-- | tests/unit/grammar/lalr1_test.go | 187 | ||||
-rw-r--r-- | tests/unit/grammar/lexical.go (renamed from tests/unit/grammar/lexical/compiler_test.go) | 0 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa.go (renamed from tests/unit/grammar/lexical/dfa/tree_test.go) | 185 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa/dfa_test.go | 121 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/dfa/symbol_position_test.go | 79 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser.go (renamed from tests/unit/grammar/lexical/parser/parser_test.go) | 518 | ||||
-rw-r--r-- | tests/unit/grammar/lexical/parser/lexer_test.go | 524 | ||||
-rw-r--r-- | tests/unit/grammar/lr0_test.go | 448 | ||||
-rw-r--r-- | tests/unit/grammar/parsing_table_test.go | 387 | ||||
-rw-r--r-- | tests/unit/grammar/symbol.go (renamed from tests/unit/grammar/symbol/symbol_test.go) | 0 | ||||
-rw-r--r-- | tests/unit/grammar/test_helper_test.go | 68 | ||||
-rw-r--r-- | tests/unit/spec/grammar/parser.go (renamed from tests/unit/spec/grammar/parser/parser_test.go) | 228 | ||||
-rw-r--r-- | tests/unit/spec/grammar/parser/lexer_test.go | 236 | ||||
-rw-r--r-- | tests/unit/spec/test.go (renamed from tests/unit/spec/test/parser_test.go) | 0 | ||||
-rw-r--r-- | tests/unit/tester.go (renamed from tests/unit/tester/tester_test.go) | 0 | ||||
-rw-r--r-- | tests/unit/utf8.go (renamed from tests/unit/utf8/utf8_test.go) | 0 |
26 files changed, 4169 insertions, 4279 deletions
diff --git a/tests/unit/compressor/compressor_test.go b/tests/unit/compressor.go index 621b731..621b731 100644 --- a/tests/unit/compressor/compressor_test.go +++ b/tests/unit/compressor.go diff --git a/tests/unit/driver/lexer/lexer_test.go b/tests/unit/driver/lexer.go index a3d0231..a3d0231 100644 --- a/tests/unit/driver/lexer/lexer_test.go +++ b/tests/unit/driver/lexer.go diff --git a/tests/unit/driver/parser.go b/tests/unit/driver/parser.go new file mode 100644 index 0000000..31fec45 --- /dev/null +++ b/tests/unit/driver/parser.go @@ -0,0 +1,1972 @@ +package parser + +import ( + "fmt" + "sort" + "strings" + "testing" + + "urubu/grammar" + spec "urubu/spec/grammar" + "urubu/spec/grammar/parser" +) + +func TestParserWithConflicts(t *testing.T) { + tests := []struct { + caption string + specSrc string + src string + cst *Node + }{ + { + caption: "when a shift/reduce conflict occurred, we prioritize the shift action", + specSrc: ` +#name test; + +expr + : expr assign expr + | id + ; + +id: "[A-Za-z0-9_]+"; +assign: '='; +`, + src: `foo=bar=baz`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "foo"), + ), + termNode("assign", "="), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "bar"), + ), + termNode("assign", "="), + nonTermNode("expr", + termNode("id", "baz"), + ), + ), + ), + }, + { + caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar", + specSrc: ` +#name test; + +s + : a + | b + ; +a + : id + ; +b + : id + ; + +id: "[A-Za-z0-9_]+"; +`, + src: `foo`, + cst: nonTermNode("s", + nonTermNode("a", + termNode("id", "foo"), + ), + ), + }, + { + caption: "left associativities defined earlier in the grammar have higher precedence", + specSrc: ` +#name test; + +#prec ( + #left mul + #left add +); + +expr + : expr add expr + | expr mul expr + | id + ; + +id: "[A-Za-z0-9_]+"; +add: '+'; +mul: '*'; +`, + src: `a+b*c*d+e`, + cst: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("add", "+"), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("id", "c"), + ), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("id", "d"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + }, + { + caption: "left associativities defined in the same line have the same precedence", + specSrc: ` +#name test; + +#prec ( + #left add sub +); + +expr + : expr add expr + | expr sub expr + | id + ; + +id: "[A-Za-z0-9_]+"; +add: '+'; +sub: '-'; +`, + src: `a-b+c+d-e`, + cst: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("sub", "-"), + nonTermNode("expr", + termNode("id", "b"), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("id", "c"), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("id", "d"), + ), + ), + termNode("sub", "-"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + }, + { + caption: "right associativities defined earlier in the grammar have higher precedence", + specSrc: ` +#name test; + +#prec ( + #right r1 + #right r2 +); + +expr + : expr r2 expr + | expr r1 expr + | id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +r1 + : 'r1'; +r2 + : 'r2'; +id + : "[A-Za-z0-9_]+"; +`, + src: `a r2 b r1 c r1 d r2 e`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("r2", "r2"), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("r1", "r1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "c"), + ), + termNode("r1", "r1"), + nonTermNode("expr", + termNode("id", "d"), + ), + ), + ), + termNode("r2", "r2"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + }, + { + caption: "right associativities defined in the same line have the same precedence", + specSrc: ` +#name test; + +#prec ( + #right r1 r2 +); + +expr + : expr r2 expr + | expr r1 expr + | id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +r1 + : 'r1'; +r2 + : 'r2'; +id + : "[A-Za-z0-9_]+"; +`, + src: `a r2 b r1 c r1 d r2 e`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("r2", "r2"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("r1", "r1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "c"), + ), + termNode("r1", "r1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "d"), + ), + termNode("r2", "r2"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + ), + ), + }, + { + caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence", + specSrc: ` +#name test; + +#prec ( + #assign a1 + #assign a2 +); + +expr + : expr a2 expr + | expr a1 expr + | id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +a1 + : 'a1'; +a2 + : 'a2'; +id + : "[A-Za-z0-9_]+"; +`, + src: `a a2 b a1 c a1 d a2 e`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("a2", "a2"), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("a1", "a1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "c"), + ), + termNode("a1", "a1"), + nonTermNode("expr", + termNode("id", "d"), + ), + ), + ), + termNode("a2", "a2"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + }, + { + caption: "terminal symbols with an #assign directive defined in the same line have the same precedence", + specSrc: ` +#name test; + +#prec ( + #assign a1 a2 +); + +expr + : expr a2 expr + | expr a1 expr + | id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +a1 + : 'a1'; +a2 + : 'a2'; +id + : "[A-Za-z0-9_]+"; +`, + src: `a a2 b a1 c a1 d a2 e`, + cst: nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("a2", "a2"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("a1", "a1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "c"), + ), + termNode("a1", "a1"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "d"), + ), + termNode("a2", "a2"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + ), + ), + }, + { + caption: "#left, #right, and #assign can be mixed", + specSrc: ` +#name test; + +#prec ( + #left mul div + #left add sub + #assign else + #assign then + #right assign +); + +expr + : expr add expr + | expr sub expr + | expr mul expr + | expr div expr + | expr assign expr + | if expr then expr + | if expr then expr else expr + | id + ; + +ws #skip: "[\u{0009}\u{0020}]+"; +if: 'if'; +then: 'then'; +else: 'else'; +id: "[A-Za-z0-9_]+"; +add: '+'; +sub: '-'; +mul: '*'; +div: '/'; +assign: '='; +`, + src: `x = y = a + b * c - d / e + if f then if g then h else i`, + cst: nonTermNode( + "expr", + nonTermNode("expr", + termNode("id", "x"), + ), + termNode("assign", "="), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "y"), + ), + termNode("assign", "="), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "a"), + ), + termNode("add", "+"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "b"), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("id", "c"), + ), + ), + ), + termNode("sub", "-"), + nonTermNode("expr", + nonTermNode("expr", + termNode("id", "d"), + ), + termNode("div", "/"), + nonTermNode("expr", + termNode("id", "e"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("if", "if"), + nonTermNode("expr", + termNode("id", "f"), + ), + termNode("then", "then"), + nonTermNode("expr", + termNode("if", "if"), + nonTermNode("expr", + termNode("id", "g"), + ), + termNode("then", "then"), + nonTermNode("expr", + termNode("id", "h"), + ), + termNode("else", "else"), + nonTermNode("expr", + termNode("id", "i"), + ), + ), + ), + ), + ), + ), + }, + } + + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + cg, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(cg, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + gram := NewGrammar(cg) + tb := NewDefaultSyntaxTreeBuilder() + p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb))) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if tt.cst != nil { + testTree(t, tb.Tree(), tt.cst) + } + }) + } +} + +func TestParserWithLAC(t *testing.T) { + specSrc := ` +#name test; + +s + : t t + ; +t + : c t + | d + ; + +c: 'c'; +d: 'd'; +` + + src := `ccd` + + actLogWithLAC := []string{ + "shift/c", + "shift/c", + "shift/d", + "miss", + } + + actLogWithoutLAC := []string{ + "shift/c", + "shift/c", + "shift/d", + "reduce/t", + "reduce/t", + "reduce/t", + "miss", + } + + ast, err := parser.Parse(strings.NewReader(specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + gram, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + t.Run("LAC is enabled", func(t *testing.T) { + semAct := &testSemAct{ + gram: gram, + } + + toks, err := NewTokenStream(gram, strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if len(semAct.actLog) != len(actLogWithLAC) { + t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog) + } + + for i, e := range actLogWithLAC { + if semAct.actLog[i] != e { + t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog) + } + } + }) + + t.Run("LAC is disabled", func(t *testing.T) { + semAct := &testSemAct{ + gram: gram, + } + + toks, err := NewTokenStream(gram, strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC()) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if len(semAct.actLog) != len(actLogWithoutLAC) { + t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog) + } + + for i, e := range actLogWithoutLAC { + if semAct.actLog[i] != e { + t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog) + } + } + }) +} + +func termNode(kind string, text string, children ...*Node) *Node { + return &Node{ + Type: NodeTypeTerminal, + KindName: kind, + Text: text, + Children: children, + } +} + +func errorNode() *Node { + return &Node{ + Type: NodeTypeError, + KindName: "error", + } +} + +func nonTermNode(kind string, children ...*Node) *Node { + return &Node{ + Type: NodeTypeNonTerminal, + KindName: kind, + Children: children, + } +} + +func TestParser_Parse(t *testing.T) { + tests := []struct { + specSrc string + src string + synErr bool + cst *Node + ast *Node + }{ + { + specSrc: ` +#name test; + +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; + +add + : '+'; +mul + : '*'; +l_paren + : '('; +r_paren + : ')'; +id + : "[A-Za-z_][0-9A-Za-z_]*"; +`, + src: `(a+(b+c))*d+e`, + cst: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("term", + nonTermNode("term", + nonTermNode("factor", + termNode("l_paren", "("), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("term", + nonTermNode("factor", + termNode("id", "a"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("term", + nonTermNode("factor", + termNode("l_paren", "("), + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("term", + nonTermNode("factor", + termNode("id", "b"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("term", + nonTermNode("factor", + termNode("id", "c"), + ), + ), + ), + termNode("r_paren", ")"), + ), + ), + ), + termNode("r_paren", ")"), + ), + ), + termNode("mul", "*"), + nonTermNode("factor", + termNode("id", "d"), + ), + ), + ), + termNode("add", "+"), + nonTermNode("term", + nonTermNode("factor", + termNode("id", "e"), + ), + ), + ), + }, + // Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are + // not allowed in string literals. + { + specSrc: ` +#name test; + +s + : a b c + ; + +a + : '\f{foo}'; +b + : '\u{0000}'; +c + : '\p{gc=Letter}'; +`, + src: `\f{foo}\u{0000}\p{gc=Letter}`, + cst: nonTermNode("s", + termNode("a", `\f{foo}`), + termNode("b", `\u{0000}`), + termNode("c", `\p{gc=Letter}`), + ), + }, + // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. + { + specSrc: ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar + : bar_text + | + ; +bar_text: "bar"; +`, + src: ``, + cst: nonTermNode("s", + nonTermNode("foo"), + nonTermNode("bar"), + ), + }, + // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. + { + specSrc: ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar + : bar_text + | + ; + +bar_text + : "bar"; +`, + src: `bar`, + cst: nonTermNode("s", + nonTermNode("foo"), + nonTermNode("bar", + termNode("bar_text", "bar"), + ), + ), + }, + // A production can have multiple alternative productions. + { + specSrc: ` +#name test; + +#prec ( + #assign $uminus + #left mul div + #left add sub +); + +expr + : expr add expr + | expr sub expr + | expr mul expr + | expr div expr + | int + | sub int #prec $uminus // This 'sub' means the unary minus symbol. + ; + +int + : "0|[1-9][0-9]*"; +add + : '+'; +sub + : '-'; +mul + : '*'; +div + : '/'; +`, + src: `-1*-2+3-4/5`, + ast: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("sub", "-"), + termNode("int", "1"), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("sub", "-"), + termNode("int", "2"), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("int", "3"), + ), + ), + termNode("sub", "-"), + nonTermNode("expr", + nonTermNode("expr", + termNode("int", "4"), + ), + termNode("div", "/"), + nonTermNode("expr", + termNode("int", "5"), + ), + ), + ), + }, + // A lexical production can have multiple production directives. + { + specSrc: ` +#name test; + +s + : push_a push_b pop pop + ; + +push_a #mode default #push a + : '->a'; +push_b #mode a #push b + : '->b'; +pop #mode a b #pop + : '<-'; +`, + src: `->a->b<-<-`, + ast: nonTermNode("s", + termNode("push_a", "->a"), + termNode("push_b", "->b"), + termNode("pop", "<-"), + termNode("pop", "<-"), + ), + }, + { + specSrc: ` +#name test; + +mode_tran_seq + : mode_tran_seq mode_tran + | mode_tran + ; +mode_tran + : push_m1 + | push_m2 + | pop_m1 + | pop_m2 + ; + +push_m1 #push m1 + : "->"; +push_m2 #mode m1 #push m2 + : "-->"; +pop_m1 #mode m1 #pop + : "<-"; +pop_m2 #mode m2 #pop + : "<--"; +whitespace #mode default m1 m2 #skip + : "\u{0020}+"; +`, + src: ` -> --> <-- <- `, + }, + { + specSrc: ` +#name test; + +s + : foo bar + ; + +foo + : "foo"; +bar #mode default + : "bar"; +`, + src: `foobar`, + }, + // When #push and #pop are applied to the same symbol, #pop will run first, then #push. + { + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push m1 + : 'foo'; +bar #mode m1 #pop #push m2 + : 'bar'; +baz #mode m2 + : 'baz'; +`, + src: `foobarbaz`, + ast: nonTermNode("s", + termNode("foo", "foo"), + termNode("bar", "bar"), + termNode("baz", "baz"), + ), + }, + // When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first + // in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which + // they are executed. + { + specSrc: ` +#name test; + +s + : foo bar baz + ; + +foo #push m1 + : 'foo'; +bar #mode m1 #push m2 #pop + : 'bar'; +baz #mode m2 + : 'baz'; +`, + src: `foobarbaz`, + ast: nonTermNode("s", + termNode("foo", "foo"), + termNode("bar", "bar"), + termNode("baz", "baz"), + ), + }, + // The parser can skips specified tokens. + { + specSrc: ` +#name test; + +s + : foo bar + ; + +foo + : "foo"; +bar + : "bar"; +white_space #skip + : "[\u{0009}\u{0020}]+"; +`, + src: `foo bar`, + }, + // A grammar can contain fragments. + { + specSrc: ` +#name test; + +s + : tagline + ; +tagline + : "\f{words} IS OUT THERE."; +fragment words + : "[A-Za-z\u{0020}]+"; +`, + src: `THE TRUTH IS OUT THERE.`, + }, + // A grammar can contain ast actions. + { + specSrc: ` +#name test; + +list + : l_bracket elems r_bracket #ast elems... + ; +elems + : elems comma id #ast elems... id + | id + ; + +whitespace #skip + : "\u{0020}+"; +l_bracket + : '['; +r_bracket + : ']'; +comma + : ','; +id + : "[A-Za-z]+"; +`, + src: `[Byers, Frohike, Langly]`, + cst: nonTermNode("list", + termNode("x_1", "["), + nonTermNode("elems", + nonTermNode("elems", + nonTermNode("elems", + termNode("id", "Byers"), + ), + termNode("x_3", ","), + termNode("id", "Frohike"), + ), + termNode("x_3", ","), + termNode("id", "Langly"), + ), + termNode("x_2", "]"), + ), + ast: nonTermNode("list", + termNode("id", "Byers"), + termNode("id", "Frohike"), + termNode("id", "Langly"), + ), + }, + // The '...' operator can expand child nodes. + { + specSrc: ` +#name test; + +s + : a #ast a... + ; +a + : a comma foo #ast a... foo + | foo + ; + +comma + : ','; +foo + : 'foo'; +`, + src: `foo,foo,foo`, + ast: nonTermNode("s", + termNode("foo", "foo"), + termNode("foo", "foo"), + termNode("foo", "foo"), + ), + }, + // The '...' operator also can applied to an element having no children. + { + specSrc: ` +#name test; + +s + : a semi_colon #ast a... + ; +a + : + ; + +semi_colon + : ';'; +`, + src: `;`, + ast: nonTermNode("s"), + }, + // A label can be a parameter of #ast directive. + { + specSrc: ` +#name test; + +#prec ( + #left add sub +); + +expr + : expr@lhs add expr@rhs #ast add lhs rhs + | expr@lhs sub expr@rhs #ast sub lhs rhs + | num + ; + +add + : '+'; +sub + : '-'; +num + : "0|[1-9][0-9]*"; +`, + src: `1+2-3`, + ast: nonTermNode("expr", + termNode("sub", "-"), + nonTermNode("expr", + termNode("add", "+"), + nonTermNode("expr", + termNode("num", "1"), + ), + nonTermNode("expr", + termNode("num", "2"), + ), + ), + nonTermNode("expr", + termNode("num", "3"), + ), + ), + }, + // An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed. + { + specSrc: ` +#name test; + +s + : foo@x semi_colon #ast foo + ; + +semi_colon + : ';'; +foo + : 'foo'; +`, + src: `foo;`, + ast: nonTermNode("s", + termNode("foo", "foo"), + ), + }, + // A production has the same precedence and associativity as the right-most terminal symbol. + { + specSrc: ` +#name test; + +#prec ( + #left add +); + +expr + : expr add expr // This alternative has the same precedence and associativiry as 'add'. + | int + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +int + : "0|[1-9][0-9]*"; +add + : '+'; +`, + // This source is recognized as the following structure because the production `expr → expr add expr` has the same + // precedence and associativity as the symbol 'add'. + // + // ((1+2)+3) + // + // If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence + // and associativity and this source will be recognized as the following structure. + // + // (1+(2+3)) + src: `1+2+3`, + ast: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("int", "1"), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("int", "2"), + ), + ), + termNode("add", "+"), + nonTermNode("expr", + termNode("int", "3"), + ), + ), + }, + // The 'prec' directive can set precedence of a production. + { + specSrc: ` +#name test; + +#prec ( + #assign $uminus + #left mul div + #left add sub +); + +expr + : expr add expr + | expr sub expr + | expr mul expr + | expr div expr + | int + | sub int #prec $uminus // This 'sub' means a unary minus symbol. + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +int + : "0|[1-9][0-9]*"; +add + : '+'; +sub + : '-'; +mul + : '*'; +div + : '/'; +`, + // This source is recognized as the following structure because the production `expr → sub expr` + // has the `#prec mul` directive and has the same precedence of the symbol `mul`. + // + // (((-1) * 20) / 5) + // + // If the production doesn't have the `#prec` directive, this source will be recognized as + // the following structure. + // + // (- ((1 * 20) / 5)) + src: `-1*20/5`, + cst: nonTermNode("expr", + nonTermNode("expr", + nonTermNode("expr", + termNode("sub", "-"), + termNode("int", "1"), + ), + termNode("mul", "*"), + nonTermNode("expr", + termNode("int", "20"), + ), + ), + termNode("div", "/"), + nonTermNode("expr", + termNode("int", "5"), + ), + ), + }, + // The grammar can contain the 'error' symbol. + { + specSrc: ` +#name test; + +s + : id id id semi_colon + | error semi_colon + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +id + : "[A-Za-z_]+"; +`, + src: `foo bar baz ;`, + }, + // The 'error' symbol can appear in an #ast directive. + { + specSrc: ` +#name test; + +s + : foo semi_colon + | error semi_colon #ast error + ; + +semi_colon + : ';'; +foo + : 'foo'; +`, + src: `bar;`, + synErr: true, + ast: nonTermNode("s", + errorNode(), + ), + }, + // The 'error' symbol can have a label, and an #ast can reference it. + { + specSrc: ` +#name test; + +s + : foo semi_colon + | error@e semi_colon #ast e + ; + +semi_colon + : ';'; +foo + : 'foo'; +`, + src: `bar;`, + synErr: true, + ast: nonTermNode("s", + errorNode(), + ), + }, + // The grammar can contain the 'recover' directive. + { + specSrc: ` +#name test; + +seq + : seq elem + | elem + ; +elem + : id id id semi_colon + | error semi_colon #recover + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +id + : "[A-Za-z_]+"; +`, + src: `a b c ; d e f ;`, + }, + // The same label can be used between different alternatives. + { + specSrc: ` +#name test; + +s + : foo@x bar + | foo@x + ; + +foo: 'foo'; +bar: 'bar'; +`, + src: `foo`, + }, + } + + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + cg, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(cg, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + gram := NewGrammar(cg) + tb := NewDefaultSyntaxTreeBuilder() + var opt []ParserOption + switch { + case tt.ast != nil: + opt = append(opt, SemanticAction(NewASTActionSet(gram, tb))) + case tt.cst != nil: + opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb))) + } + p, err := NewParser(toks, gram, opt...) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if !tt.synErr && len(p.SyntaxErrors()) > 0 { + for _, synErr := range p.SyntaxErrors() { + t.Fatalf("unexpected syntax errors occurred: %v", synErr) + } + } + + switch { + case tt.ast != nil: + testTree(t, tb.Tree(), tt.ast) + case tt.cst != nil: + testTree(t, tb.Tree(), tt.cst) + } + }) + } +} + +func testTree(t *testing.T, node, expected *Node) { + t.Helper() + + if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text { + t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node) + } + if len(node.Children) != len(expected.Children) { + t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children)) + } + for i, c := range node.Children { + testTree(t, c, expected.Children[i]) + } +} + +type testSemAct struct { + gram *spec.CompiledGrammar + actLog []string +} + +func (a *testSemAct) Shift(tok VToken, recovered bool) { + t := a.gram.Syntactic.Terminals[tok.TerminalID()] + if recovered { + a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t)) + } else { + a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t)) + } +} + +func (a *testSemAct) Reduce(prodNum int, recovered bool) { + lhsSym := a.gram.Syntactic.LHSSymbols[prodNum] + lhsText := a.gram.Syntactic.NonTerminals[lhsSym] + if recovered { + a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText)) + } else { + a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText)) + } +} + +func (a *testSemAct) Accept() { + a.actLog = append(a.actLog, "accept") +} + +func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) { + a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped)) +} + +func (a *testSemAct) MissError(cause VToken) { + a.actLog = append(a.actLog, "miss") +} + +func TestParserWithSemanticAction(t *testing.T) { + specSrcWithErrorProd := ` +#name test; + +seq + : seq elem semicolon + | elem semicolon + | error star star semicolon + | error semicolon #recover + ; +elem + : char char char + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semicolon + : ';'; +star + : '*'; +char + : "[a-z]"; +` + + specSrcWithoutErrorProd := ` +#name test; + +seq + : seq elem semicolon + | elem semicolon + ; +elem + : char char char + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semicolon + : ';'; +char + : "[a-z]"; +` + + tests := []struct { + caption string + specSrc string + src string + actLog []string + }{ + { + caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.", + specSrc: specSrcWithErrorProd, + src: `a b c; d e f;`, + actLog: []string{ + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + "accept", + }, + }, + { + caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.", + specSrc: specSrcWithErrorProd, + src: `a; b !; c d !; e ! * *; h i j;`, + actLog: []string{ + "shift/char", + "trap/1/shift/error", + "shift/semicolon", + "reduce/seq/recovered", + + "shift/char", + "trap/2/shift/error", + "shift/semicolon", + "reduce/seq/recovered", + + "shift/char", + "shift/char", + "trap/3/shift/error", + "shift/semicolon", + "reduce/seq/recovered", + + "shift/char", + "trap/2/shift/error", + "shift/star", + "shift/star", + // When the driver shifts three times, it recovers from an error. + "shift/semicolon/recovered", + "reduce/seq", + + "shift/char", + "shift/char", + "shift/char", + "reduce/elem", + "shift/semicolon", + "reduce/seq", + + // Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted + // according to the error production. + "accept", + }, + }, + { + caption: "when the input doesn't meet the error production, the driver calls `MissError`.", + specSrc: specSrcWithErrorProd, + src: `a !`, + actLog: []string{ + "shift/char", + "trap/1/shift/error", + + "miss", + }, + }, + { + caption: "when a syntax error isn't trapped, the driver calls `MissError`.", + specSrc: specSrcWithoutErrorProd, + src: `a !`, + actLog: []string{ + "shift/char", + + "miss", + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + gram, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + semAct := &testSemAct{ + gram: gram, + } + p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + if len(semAct.actLog) != len(tt.actLog) { + t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) + } + + for i, e := range tt.actLog { + if semAct.actLog[i] != e { + t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) + } + } + }) + } +} + +func TestParserWithSyntaxErrors(t *testing.T) { + tests := []struct { + caption string + specSrc string + src string + synErrCount int + }{ + { + caption: "the parser can report a syntax error", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +`, + src: `bar`, + synErrCount: 1, + }, + { + caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state", + specSrc: ` +#name test; + +seq + : seq elem semi_colon + | elem semi_colon + | error semi_colon #recover + ; +elem + : a b c + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +a + : 'a'; +b + : 'b'; +c + : 'c'; +`, + src: `!; a!; ab!;`, + synErrCount: 3, + }, + { + caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears", + specSrc: ` +#name test; + +seq + : seq elem semi_colon + | elem semi_colon + | error semi_colon #recover + ; +elem + : a b c + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +a + : 'a'; +b + : 'b'; +c + : 'c'; +`, + // After the parser trasits to the error state reading the first invalid symbol ('!'), + // the second and third invalid symbols ('!') are ignored. + src: `! ! !; a!; ab!;`, + synErrCount: 3, + }, + { + caption: "when the parser performs shift three times, the parser recovers from the error state", + specSrc: ` +#name test; + +seq + : seq elem semi_colon + | elem semi_colon + | error star star semi_colon + ; +elem + : a b c + ; + +ws #skip + : "[\u{0009}\u{0020}]+"; +semi_colon + : ';'; +star + : '*'; +a + : 'a'; +b + : 'b'; +c + : 'c'; +`, + src: `!**; a!**; ab!**; abc!`, + synErrCount: 4, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + gram, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + synErrs := p.SyntaxErrors() + if len(synErrs) != tt.synErrCount { + t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs)) + } + }) + } +} + +func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) { + tests := []struct { + caption string + specSrc string + src string + cause string + expected []string + }{ + { + caption: "the parser reports an expected lookahead symbol", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +`, + src: `bar`, + cause: `bar`, + expected: []string{ + "foo", + }, + }, + { + caption: "the parser reports expected lookahead symbols", + specSrc: ` +#name test; + +s + : foo + | bar + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + src: `baz`, + cause: `baz`, + expected: []string{ + "foo", + "bar", + }, + }, + { + caption: "the parser may report the EOF as an expected lookahead symbol", + specSrc: ` +#name test; + +s + : foo + ; + +foo + : 'foo'; +`, + src: `foobar`, + cause: `bar`, + expected: []string{ + "<eof>", + }, + }, + { + caption: "the parser may report the EOF and others as expected lookahead symbols", + specSrc: ` +#name test; + +s + : foo + | + ; + +foo + : 'foo'; +`, + src: `bar`, + cause: `bar`, + expected: []string{ + "foo", + "<eof>", + }, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + ast, err := parser.Parse(strings.NewReader(tt.specSrc)) + if err != nil { + t.Fatal(err) + } + + b := grammar.GrammarBuilder{ + AST: ast, + } + gram, _, err := b.Build() + if err != nil { + t.Fatal(err) + } + + toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + + p, err := NewParser(toks, NewGrammar(gram)) + if err != nil { + t.Fatal(err) + } + + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + synErrs := p.SyntaxErrors() + if synErrs == nil { + t.Fatalf("expected one syntax error, but it didn't occur") + } + if len(synErrs) != 1 { + t.Fatalf("too many syntax errors: %v errors", len(synErrs)) + } + synErr := synErrs[0] + if string(synErr.Token.Lexeme()) != tt.cause { + t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme())) + } + if len(synErr.ExpectedTerminals) != len(tt.expected) { + t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals) + } + sort.Slice(tt.expected, func(i, j int) bool { + return tt.expected[i] < tt.expected[j] + }) + sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool { + return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j] + }) + for i, e := range tt.expected { + if synErr.ExpectedTerminals[i] != e { + t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i]) + } + } + }) + } +} diff --git a/tests/unit/driver/parser/conflict_test.go b/tests/unit/driver/parser/conflict_test.go deleted file mode 100644 index 0bc14d4..0000000 --- a/tests/unit/driver/parser/conflict_test.go +++ /dev/null @@ -1,524 +0,0 @@ -package parser - -import ( - "strings" - "testing" - - "urubu/grammar" - "urubu/spec/grammar/parser" -) - -func TestParserWithConflicts(t *testing.T) { - tests := []struct { - caption string - specSrc string - src string - cst *Node - }{ - { - caption: "when a shift/reduce conflict occurred, we prioritize the shift action", - specSrc: ` -#name test; - -expr - : expr assign expr - | id - ; - -id: "[A-Za-z0-9_]+"; -assign: '='; -`, - src: `foo=bar=baz`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "foo"), - ), - termNode("assign", "="), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "bar"), - ), - termNode("assign", "="), - nonTermNode("expr", - termNode("id", "baz"), - ), - ), - ), - }, - { - caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar", - specSrc: ` -#name test; - -s - : a - | b - ; -a - : id - ; -b - : id - ; - -id: "[A-Za-z0-9_]+"; -`, - src: `foo`, - cst: nonTermNode("s", - nonTermNode("a", - termNode("id", "foo"), - ), - ), - }, - { - caption: "left associativities defined earlier in the grammar have higher precedence", - specSrc: ` -#name test; - -#prec ( - #left mul - #left add -); - -expr - : expr add expr - | expr mul expr - | id - ; - -id: "[A-Za-z0-9_]+"; -add: '+'; -mul: '*'; -`, - src: `a+b*c*d+e`, - cst: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("add", "+"), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("id", "c"), - ), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("id", "d"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - }, - { - caption: "left associativities defined in the same line have the same precedence", - specSrc: ` -#name test; - -#prec ( - #left add sub -); - -expr - : expr add expr - | expr sub expr - | id - ; - -id: "[A-Za-z0-9_]+"; -add: '+'; -sub: '-'; -`, - src: `a-b+c+d-e`, - cst: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("sub", "-"), - nonTermNode("expr", - termNode("id", "b"), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("id", "c"), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("id", "d"), - ), - ), - termNode("sub", "-"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - }, - { - caption: "right associativities defined earlier in the grammar have higher precedence", - specSrc: ` -#name test; - -#prec ( - #right r1 - #right r2 -); - -expr - : expr r2 expr - | expr r1 expr - | id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -r1 - : 'r1'; -r2 - : 'r2'; -id - : "[A-Za-z0-9_]+"; -`, - src: `a r2 b r1 c r1 d r2 e`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("r2", "r2"), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("r1", "r1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "c"), - ), - termNode("r1", "r1"), - nonTermNode("expr", - termNode("id", "d"), - ), - ), - ), - termNode("r2", "r2"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - }, - { - caption: "right associativities defined in the same line have the same precedence", - specSrc: ` -#name test; - -#prec ( - #right r1 r2 -); - -expr - : expr r2 expr - | expr r1 expr - | id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -r1 - : 'r1'; -r2 - : 'r2'; -id - : "[A-Za-z0-9_]+"; -`, - src: `a r2 b r1 c r1 d r2 e`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("r2", "r2"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("r1", "r1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "c"), - ), - termNode("r1", "r1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "d"), - ), - termNode("r2", "r2"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - ), - ), - }, - { - caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence", - specSrc: ` -#name test; - -#prec ( - #assign a1 - #assign a2 -); - -expr - : expr a2 expr - | expr a1 expr - | id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -a1 - : 'a1'; -a2 - : 'a2'; -id - : "[A-Za-z0-9_]+"; -`, - src: `a a2 b a1 c a1 d a2 e`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("a2", "a2"), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("a1", "a1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "c"), - ), - termNode("a1", "a1"), - nonTermNode("expr", - termNode("id", "d"), - ), - ), - ), - termNode("a2", "a2"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - }, - { - caption: "terminal symbols with an #assign directive defined in the same line have the same precedence", - specSrc: ` -#name test; - -#prec ( - #assign a1 a2 -); - -expr - : expr a2 expr - | expr a1 expr - | id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -a1 - : 'a1'; -a2 - : 'a2'; -id - : "[A-Za-z0-9_]+"; -`, - src: `a a2 b a1 c a1 d a2 e`, - cst: nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("a2", "a2"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("a1", "a1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "c"), - ), - termNode("a1", "a1"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "d"), - ), - termNode("a2", "a2"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - ), - ), - }, - { - caption: "#left, #right, and #assign can be mixed", - specSrc: ` -#name test; - -#prec ( - #left mul div - #left add sub - #assign else - #assign then - #right assign -); - -expr - : expr add expr - | expr sub expr - | expr mul expr - | expr div expr - | expr assign expr - | if expr then expr - | if expr then expr else expr - | id - ; - -ws #skip: "[\u{0009}\u{0020}]+"; -if: 'if'; -then: 'then'; -else: 'else'; -id: "[A-Za-z0-9_]+"; -add: '+'; -sub: '-'; -mul: '*'; -div: '/'; -assign: '='; -`, - src: `x = y = a + b * c - d / e + if f then if g then h else i`, - cst: nonTermNode( - "expr", - nonTermNode("expr", - termNode("id", "x"), - ), - termNode("assign", "="), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "y"), - ), - termNode("assign", "="), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "a"), - ), - termNode("add", "+"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "b"), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("id", "c"), - ), - ), - ), - termNode("sub", "-"), - nonTermNode("expr", - nonTermNode("expr", - termNode("id", "d"), - ), - termNode("div", "/"), - nonTermNode("expr", - termNode("id", "e"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("if", "if"), - nonTermNode("expr", - termNode("id", "f"), - ), - termNode("then", "then"), - nonTermNode("expr", - termNode("if", "if"), - nonTermNode("expr", - termNode("id", "g"), - ), - termNode("then", "then"), - nonTermNode("expr", - termNode("id", "h"), - ), - termNode("else", "else"), - nonTermNode("expr", - termNode("id", "i"), - ), - ), - ), - ), - ), - ), - }, - } - - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - cg, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(cg, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - gram := NewGrammar(cg) - tb := NewDefaultSyntaxTreeBuilder() - p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb))) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if tt.cst != nil { - testTree(t, tb.Tree(), tt.cst) - } - }) - } -} diff --git a/tests/unit/driver/parser/lac_test.go b/tests/unit/driver/parser/lac_test.go deleted file mode 100644 index c2368e8..0000000 --- a/tests/unit/driver/parser/lac_test.go +++ /dev/null @@ -1,120 +0,0 @@ -package parser - -import ( - "strings" - "testing" - - "urubu/grammar" - "urubu/spec/grammar/parser" -) - -func TestParserWithLAC(t *testing.T) { - specSrc := ` -#name test; - -s - : t t - ; -t - : c t - | d - ; - -c: 'c'; -d: 'd'; -` - - src := `ccd` - - actLogWithLAC := []string{ - "shift/c", - "shift/c", - "shift/d", - "miss", - } - - actLogWithoutLAC := []string{ - "shift/c", - "shift/c", - "shift/d", - "reduce/t", - "reduce/t", - "reduce/t", - "miss", - } - - ast, err := parser.Parse(strings.NewReader(specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - gram, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - t.Run("LAC is enabled", func(t *testing.T) { - semAct := &testSemAct{ - gram: gram, - } - - toks, err := NewTokenStream(gram, strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - - p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if len(semAct.actLog) != len(actLogWithLAC) { - t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog) - } - - for i, e := range actLogWithLAC { - if semAct.actLog[i] != e { - t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog) - } - } - }) - - t.Run("LAC is disabled", func(t *testing.T) { - semAct := &testSemAct{ - gram: gram, - } - - toks, err := NewTokenStream(gram, strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - - p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC()) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if len(semAct.actLog) != len(actLogWithoutLAC) { - t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog) - } - - for i, e := range actLogWithoutLAC { - if semAct.actLog[i] != e { - t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog) - } - } - }) -} diff --git a/tests/unit/driver/parser/parser_test.go b/tests/unit/driver/parser/parser_test.go deleted file mode 100644 index bca0391..0000000 --- a/tests/unit/driver/parser/parser_test.go +++ /dev/null @@ -1,833 +0,0 @@ -package parser - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar" - "urubu/spec/grammar/parser" -) - -func termNode(kind string, text string, children ...*Node) *Node { - return &Node{ - Type: NodeTypeTerminal, - KindName: kind, - Text: text, - Children: children, - } -} - -func errorNode() *Node { - return &Node{ - Type: NodeTypeError, - KindName: "error", - } -} - -func nonTermNode(kind string, children ...*Node) *Node { - return &Node{ - Type: NodeTypeNonTerminal, - KindName: kind, - Children: children, - } -} - -func TestParser_Parse(t *testing.T) { - tests := []struct { - specSrc string - src string - synErr bool - cst *Node - ast *Node - }{ - { - specSrc: ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; - -add - : '+'; -mul - : '*'; -l_paren - : '('; -r_paren - : ')'; -id - : "[A-Za-z_][0-9A-Za-z_]*"; -`, - src: `(a+(b+c))*d+e`, - cst: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("term", - nonTermNode("term", - nonTermNode("factor", - termNode("l_paren", "("), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("term", - nonTermNode("factor", - termNode("id", "a"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("term", - nonTermNode("factor", - termNode("l_paren", "("), - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("term", - nonTermNode("factor", - termNode("id", "b"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("term", - nonTermNode("factor", - termNode("id", "c"), - ), - ), - ), - termNode("r_paren", ")"), - ), - ), - ), - termNode("r_paren", ")"), - ), - ), - termNode("mul", "*"), - nonTermNode("factor", - termNode("id", "d"), - ), - ), - ), - termNode("add", "+"), - nonTermNode("term", - nonTermNode("factor", - termNode("id", "e"), - ), - ), - ), - }, - // Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are - // not allowed in string literals. - { - specSrc: ` -#name test; - -s - : a b c - ; - -a - : '\f{foo}'; -b - : '\u{0000}'; -c - : '\p{gc=Letter}'; -`, - src: `\f{foo}\u{0000}\p{gc=Letter}`, - cst: nonTermNode("s", - termNode("a", `\f{foo}`), - termNode("b", `\u{0000}`), - termNode("c", `\p{gc=Letter}`), - ), - }, - // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. - { - specSrc: ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar - : bar_text - | - ; -bar_text: "bar"; -`, - src: ``, - cst: nonTermNode("s", - nonTermNode("foo"), - nonTermNode("bar"), - ), - }, - // The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node. - { - specSrc: ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar - : bar_text - | - ; - -bar_text - : "bar"; -`, - src: `bar`, - cst: nonTermNode("s", - nonTermNode("foo"), - nonTermNode("bar", - termNode("bar_text", "bar"), - ), - ), - }, - // A production can have multiple alternative productions. - { - specSrc: ` -#name test; - -#prec ( - #assign $uminus - #left mul div - #left add sub -); - -expr - : expr add expr - | expr sub expr - | expr mul expr - | expr div expr - | int - | sub int #prec $uminus // This 'sub' means the unary minus symbol. - ; - -int - : "0|[1-9][0-9]*"; -add - : '+'; -sub - : '-'; -mul - : '*'; -div - : '/'; -`, - src: `-1*-2+3-4/5`, - ast: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("sub", "-"), - termNode("int", "1"), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("sub", "-"), - termNode("int", "2"), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("int", "3"), - ), - ), - termNode("sub", "-"), - nonTermNode("expr", - nonTermNode("expr", - termNode("int", "4"), - ), - termNode("div", "/"), - nonTermNode("expr", - termNode("int", "5"), - ), - ), - ), - }, - // A lexical production can have multiple production directives. - { - specSrc: ` -#name test; - -s - : push_a push_b pop pop - ; - -push_a #mode default #push a - : '->a'; -push_b #mode a #push b - : '->b'; -pop #mode a b #pop - : '<-'; -`, - src: `->a->b<-<-`, - ast: nonTermNode("s", - termNode("push_a", "->a"), - termNode("push_b", "->b"), - termNode("pop", "<-"), - termNode("pop", "<-"), - ), - }, - { - specSrc: ` -#name test; - -mode_tran_seq - : mode_tran_seq mode_tran - | mode_tran - ; -mode_tran - : push_m1 - | push_m2 - | pop_m1 - | pop_m2 - ; - -push_m1 #push m1 - : "->"; -push_m2 #mode m1 #push m2 - : "-->"; -pop_m1 #mode m1 #pop - : "<-"; -pop_m2 #mode m2 #pop - : "<--"; -whitespace #mode default m1 m2 #skip - : "\u{0020}+"; -`, - src: ` -> --> <-- <- `, - }, - { - specSrc: ` -#name test; - -s - : foo bar - ; - -foo - : "foo"; -bar #mode default - : "bar"; -`, - src: `foobar`, - }, - // When #push and #pop are applied to the same symbol, #pop will run first, then #push. - { - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push m1 - : 'foo'; -bar #mode m1 #pop #push m2 - : 'bar'; -baz #mode m2 - : 'baz'; -`, - src: `foobarbaz`, - ast: nonTermNode("s", - termNode("foo", "foo"), - termNode("bar", "bar"), - termNode("baz", "baz"), - ), - }, - // When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first - // in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which - // they are executed. - { - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push m1 - : 'foo'; -bar #mode m1 #push m2 #pop - : 'bar'; -baz #mode m2 - : 'baz'; -`, - src: `foobarbaz`, - ast: nonTermNode("s", - termNode("foo", "foo"), - termNode("bar", "bar"), - termNode("baz", "baz"), - ), - }, - // The parser can skips specified tokens. - { - specSrc: ` -#name test; - -s - : foo bar - ; - -foo - : "foo"; -bar - : "bar"; -white_space #skip - : "[\u{0009}\u{0020}]+"; -`, - src: `foo bar`, - }, - // A grammar can contain fragments. - { - specSrc: ` -#name test; - -s - : tagline - ; -tagline - : "\f{words} IS OUT THERE."; -fragment words - : "[A-Za-z\u{0020}]+"; -`, - src: `THE TRUTH IS OUT THERE.`, - }, - // A grammar can contain ast actions. - { - specSrc: ` -#name test; - -list - : l_bracket elems r_bracket #ast elems... - ; -elems - : elems comma id #ast elems... id - | id - ; - -whitespace #skip - : "\u{0020}+"; -l_bracket - : '['; -r_bracket - : ']'; -comma - : ','; -id - : "[A-Za-z]+"; -`, - src: `[Byers, Frohike, Langly]`, - cst: nonTermNode("list", - termNode("x_1", "["), - nonTermNode("elems", - nonTermNode("elems", - nonTermNode("elems", - termNode("id", "Byers"), - ), - termNode("x_3", ","), - termNode("id", "Frohike"), - ), - termNode("x_3", ","), - termNode("id", "Langly"), - ), - termNode("x_2", "]"), - ), - ast: nonTermNode("list", - termNode("id", "Byers"), - termNode("id", "Frohike"), - termNode("id", "Langly"), - ), - }, - // The '...' operator can expand child nodes. - { - specSrc: ` -#name test; - -s - : a #ast a... - ; -a - : a comma foo #ast a... foo - | foo - ; - -comma - : ','; -foo - : 'foo'; -`, - src: `foo,foo,foo`, - ast: nonTermNode("s", - termNode("foo", "foo"), - termNode("foo", "foo"), - termNode("foo", "foo"), - ), - }, - // The '...' operator also can applied to an element having no children. - { - specSrc: ` -#name test; - -s - : a semi_colon #ast a... - ; -a - : - ; - -semi_colon - : ';'; -`, - src: `;`, - ast: nonTermNode("s"), - }, - // A label can be a parameter of #ast directive. - { - specSrc: ` -#name test; - -#prec ( - #left add sub -); - -expr - : expr@lhs add expr@rhs #ast add lhs rhs - | expr@lhs sub expr@rhs #ast sub lhs rhs - | num - ; - -add - : '+'; -sub - : '-'; -num - : "0|[1-9][0-9]*"; -`, - src: `1+2-3`, - ast: nonTermNode("expr", - termNode("sub", "-"), - nonTermNode("expr", - termNode("add", "+"), - nonTermNode("expr", - termNode("num", "1"), - ), - nonTermNode("expr", - termNode("num", "2"), - ), - ), - nonTermNode("expr", - termNode("num", "3"), - ), - ), - }, - // An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed. - { - specSrc: ` -#name test; - -s - : foo@x semi_colon #ast foo - ; - -semi_colon - : ';'; -foo - : 'foo'; -`, - src: `foo;`, - ast: nonTermNode("s", - termNode("foo", "foo"), - ), - }, - // A production has the same precedence and associativity as the right-most terminal symbol. - { - specSrc: ` -#name test; - -#prec ( - #left add -); - -expr - : expr add expr // This alternative has the same precedence and associativiry as 'add'. - | int - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -int - : "0|[1-9][0-9]*"; -add - : '+'; -`, - // This source is recognized as the following structure because the production `expr → expr add expr` has the same - // precedence and associativity as the symbol 'add'. - // - // ((1+2)+3) - // - // If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence - // and associativity and this source will be recognized as the following structure. - // - // (1+(2+3)) - src: `1+2+3`, - ast: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("int", "1"), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("int", "2"), - ), - ), - termNode("add", "+"), - nonTermNode("expr", - termNode("int", "3"), - ), - ), - }, - // The 'prec' directive can set precedence of a production. - { - specSrc: ` -#name test; - -#prec ( - #assign $uminus - #left mul div - #left add sub -); - -expr - : expr add expr - | expr sub expr - | expr mul expr - | expr div expr - | int - | sub int #prec $uminus // This 'sub' means a unary minus symbol. - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -int - : "0|[1-9][0-9]*"; -add - : '+'; -sub - : '-'; -mul - : '*'; -div - : '/'; -`, - // This source is recognized as the following structure because the production `expr → sub expr` - // has the `#prec mul` directive and has the same precedence of the symbol `mul`. - // - // (((-1) * 20) / 5) - // - // If the production doesn't have the `#prec` directive, this source will be recognized as - // the following structure. - // - // (- ((1 * 20) / 5)) - src: `-1*20/5`, - cst: nonTermNode("expr", - nonTermNode("expr", - nonTermNode("expr", - termNode("sub", "-"), - termNode("int", "1"), - ), - termNode("mul", "*"), - nonTermNode("expr", - termNode("int", "20"), - ), - ), - termNode("div", "/"), - nonTermNode("expr", - termNode("int", "5"), - ), - ), - }, - // The grammar can contain the 'error' symbol. - { - specSrc: ` -#name test; - -s - : id id id semi_colon - | error semi_colon - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -id - : "[A-Za-z_]+"; -`, - src: `foo bar baz ;`, - }, - // The 'error' symbol can appear in an #ast directive. - { - specSrc: ` -#name test; - -s - : foo semi_colon - | error semi_colon #ast error - ; - -semi_colon - : ';'; -foo - : 'foo'; -`, - src: `bar;`, - synErr: true, - ast: nonTermNode("s", - errorNode(), - ), - }, - // The 'error' symbol can have a label, and an #ast can reference it. - { - specSrc: ` -#name test; - -s - : foo semi_colon - | error@e semi_colon #ast e - ; - -semi_colon - : ';'; -foo - : 'foo'; -`, - src: `bar;`, - synErr: true, - ast: nonTermNode("s", - errorNode(), - ), - }, - // The grammar can contain the 'recover' directive. - { - specSrc: ` -#name test; - -seq - : seq elem - | elem - ; -elem - : id id id semi_colon - | error semi_colon #recover - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -id - : "[A-Za-z_]+"; -`, - src: `a b c ; d e f ;`, - }, - // The same label can be used between different alternatives. - { - specSrc: ` -#name test; - -s - : foo@x bar - | foo@x - ; - -foo: 'foo'; -bar: 'bar'; -`, - src: `foo`, - }, - } - - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - cg, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(cg, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - gram := NewGrammar(cg) - tb := NewDefaultSyntaxTreeBuilder() - var opt []ParserOption - switch { - case tt.ast != nil: - opt = append(opt, SemanticAction(NewASTActionSet(gram, tb))) - case tt.cst != nil: - opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb))) - } - p, err := NewParser(toks, gram, opt...) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if !tt.synErr && len(p.SyntaxErrors()) > 0 { - for _, synErr := range p.SyntaxErrors() { - t.Fatalf("unexpected syntax errors occurred: %v", synErr) - } - } - - switch { - case tt.ast != nil: - testTree(t, tb.Tree(), tt.ast) - case tt.cst != nil: - testTree(t, tb.Tree(), tt.cst) - } - }) - } -} - -func testTree(t *testing.T, node, expected *Node) { - t.Helper() - - if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text { - t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node) - } - if len(node.Children) != len(expected.Children) { - t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children)) - } - for i, c := range node.Children { - testTree(t, c, expected.Children[i]) - } -} diff --git a/tests/unit/driver/parser/semantic_action_test.go b/tests/unit/driver/parser/semantic_action_test.go deleted file mode 100644 index cb3ee70..0000000 --- a/tests/unit/driver/parser/semantic_action_test.go +++ /dev/null @@ -1,227 +0,0 @@ -package parser - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar" - spec "urubu/spec/grammar" - "urubu/spec/grammar/parser" -) - -type testSemAct struct { - gram *spec.CompiledGrammar - actLog []string -} - -func (a *testSemAct) Shift(tok VToken, recovered bool) { - t := a.gram.Syntactic.Terminals[tok.TerminalID()] - if recovered { - a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t)) - } else { - a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t)) - } -} - -func (a *testSemAct) Reduce(prodNum int, recovered bool) { - lhsSym := a.gram.Syntactic.LHSSymbols[prodNum] - lhsText := a.gram.Syntactic.NonTerminals[lhsSym] - if recovered { - a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText)) - } else { - a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText)) - } -} - -func (a *testSemAct) Accept() { - a.actLog = append(a.actLog, "accept") -} - -func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) { - a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped)) -} - -func (a *testSemAct) MissError(cause VToken) { - a.actLog = append(a.actLog, "miss") -} - -func TestParserWithSemanticAction(t *testing.T) { - specSrcWithErrorProd := ` -#name test; - -seq - : seq elem semicolon - | elem semicolon - | error star star semicolon - | error semicolon #recover - ; -elem - : char char char - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semicolon - : ';'; -star - : '*'; -char - : "[a-z]"; -` - - specSrcWithoutErrorProd := ` -#name test; - -seq - : seq elem semicolon - | elem semicolon - ; -elem - : char char char - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semicolon - : ';'; -char - : "[a-z]"; -` - - tests := []struct { - caption string - specSrc string - src string - actLog []string - }{ - { - caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.", - specSrc: specSrcWithErrorProd, - src: `a b c; d e f;`, - actLog: []string{ - "shift/char", - "shift/char", - "shift/char", - "reduce/elem", - "shift/semicolon", - "reduce/seq", - - "shift/char", - "shift/char", - "shift/char", - "reduce/elem", - "shift/semicolon", - "reduce/seq", - - "accept", - }, - }, - { - caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.", - specSrc: specSrcWithErrorProd, - src: `a; b !; c d !; e ! * *; h i j;`, - actLog: []string{ - "shift/char", - "trap/1/shift/error", - "shift/semicolon", - "reduce/seq/recovered", - - "shift/char", - "trap/2/shift/error", - "shift/semicolon", - "reduce/seq/recovered", - - "shift/char", - "shift/char", - "trap/3/shift/error", - "shift/semicolon", - "reduce/seq/recovered", - - "shift/char", - "trap/2/shift/error", - "shift/star", - "shift/star", - // When the driver shifts three times, it recovers from an error. - "shift/semicolon/recovered", - "reduce/seq", - - "shift/char", - "shift/char", - "shift/char", - "reduce/elem", - "shift/semicolon", - "reduce/seq", - - // Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted - // according to the error production. - "accept", - }, - }, - { - caption: "when the input doesn't meet the error production, the driver calls `MissError`.", - specSrc: specSrcWithErrorProd, - src: `a !`, - actLog: []string{ - "shift/char", - "trap/1/shift/error", - - "miss", - }, - }, - { - caption: "when a syntax error isn't trapped, the driver calls `MissError`.", - specSrc: specSrcWithoutErrorProd, - src: `a !`, - actLog: []string{ - "shift/char", - - "miss", - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - gram, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - semAct := &testSemAct{ - gram: gram, - } - p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct)) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - if len(semAct.actLog) != len(tt.actLog) { - t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) - } - - for i, e := range tt.actLog { - if semAct.actLog[i] != e { - t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog) - } - } - }) - } -} diff --git a/tests/unit/driver/parser/syntax_error_test.go b/tests/unit/driver/parser/syntax_error_test.go deleted file mode 100644 index 90e5bd2..0000000 --- a/tests/unit/driver/parser/syntax_error_test.go +++ /dev/null @@ -1,306 +0,0 @@ -package parser - -import ( - "fmt" - "sort" - "strings" - "testing" - - "urubu/grammar" - "urubu/spec/grammar/parser" -) - -func TestParserWithSyntaxErrors(t *testing.T) { - tests := []struct { - caption string - specSrc string - src string - synErrCount int - }{ - { - caption: "the parser can report a syntax error", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -`, - src: `bar`, - synErrCount: 1, - }, - { - caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state", - specSrc: ` -#name test; - -seq - : seq elem semi_colon - | elem semi_colon - | error semi_colon #recover - ; -elem - : a b c - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -a - : 'a'; -b - : 'b'; -c - : 'c'; -`, - src: `!; a!; ab!;`, - synErrCount: 3, - }, - { - caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears", - specSrc: ` -#name test; - -seq - : seq elem semi_colon - | elem semi_colon - | error semi_colon #recover - ; -elem - : a b c - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -a - : 'a'; -b - : 'b'; -c - : 'c'; -`, - // After the parser trasits to the error state reading the first invalid symbol ('!'), - // the second and third invalid symbols ('!') are ignored. - src: `! ! !; a!; ab!;`, - synErrCount: 3, - }, - { - caption: "when the parser performs shift three times, the parser recovers from the error state", - specSrc: ` -#name test; - -seq - : seq elem semi_colon - | elem semi_colon - | error star star semi_colon - ; -elem - : a b c - ; - -ws #skip - : "[\u{0009}\u{0020}]+"; -semi_colon - : ';'; -star - : '*'; -a - : 'a'; -b - : 'b'; -c - : 'c'; -`, - src: `!**; a!**; ab!**; abc!`, - synErrCount: 4, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - gram, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - p, err := NewParser(toks, NewGrammar(gram)) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - synErrs := p.SyntaxErrors() - if len(synErrs) != tt.synErrCount { - t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs)) - } - }) - } -} - -func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) { - tests := []struct { - caption string - specSrc string - src string - cause string - expected []string - }{ - { - caption: "the parser reports an expected lookahead symbol", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -`, - src: `bar`, - cause: `bar`, - expected: []string{ - "foo", - }, - }, - { - caption: "the parser reports expected lookahead symbols", - specSrc: ` -#name test; - -s - : foo - | bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - src: `baz`, - cause: `baz`, - expected: []string{ - "foo", - "bar", - }, - }, - { - caption: "the parser may report the EOF as an expected lookahead symbol", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -`, - src: `foobar`, - cause: `bar`, - expected: []string{ - "<eof>", - }, - }, - { - caption: "the parser may report the EOF and others as expected lookahead symbols", - specSrc: ` -#name test; - -s - : foo - | - ; - -foo - : 'foo'; -`, - src: `bar`, - cause: `bar`, - expected: []string{ - "foo", - "<eof>", - }, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(tt.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := grammar.GrammarBuilder{ - AST: ast, - } - gram, _, err := b.Build() - if err != nil { - t.Fatal(err) - } - - toks, err := NewTokenStream(gram, strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - - p, err := NewParser(toks, NewGrammar(gram)) - if err != nil { - t.Fatal(err) - } - - err = p.Parse() - if err != nil { - t.Fatal(err) - } - - synErrs := p.SyntaxErrors() - if synErrs == nil { - t.Fatalf("expected one syntax error, but it didn't occur") - } - if len(synErrs) != 1 { - t.Fatalf("too many syntax errors: %v errors", len(synErrs)) - } - synErr := synErrs[0] - if string(synErr.Token.Lexeme()) != tt.cause { - t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme())) - } - if len(synErr.ExpectedTerminals) != len(tt.expected) { - t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals) - } - sort.Slice(tt.expected, func(i, j int) bool { - return tt.expected[i] < tt.expected[j] - }) - sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool { - return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j] - }) - for i, e := range tt.expected { - if synErr.ExpectedTerminals[i] != e { - t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i]) - } - } - }) - } -} diff --git a/tests/unit/grammar/grammar_test.go b/tests/unit/grammar.go index ddedb27..3743b23 100644 --- a/tests/unit/grammar/grammar_test.go +++ b/tests/unit/grammar.go @@ -1,13 +1,225 @@ package grammar import ( + "fmt" "strings" "testing" verr "urubu/error" + "urubu/grammar/symbol" "urubu/spec/grammar/parser" ) +type first struct { + lhs string + num int + dot int + symbols []string + empty bool +} + +func TestGenFirst(t *testing.T) { + tests := []struct { + caption string + src string + first []first + }{ + { + caption: "productions contain only non-empty productions", + src: ` +#name test; + +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +`, + first: []first{ + {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}}, + {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, + {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}}, + {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, + {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, + {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}}, + {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}}, + {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}}, + {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}}, + }, + }, + { + caption: "productions contain the empty start production", + src: ` +#name test; + +s + : + ; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "productions contain an empty production", + src: ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar: "bar"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, + {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "a start production contains a non-empty alternative and empty alternative", + src: ` +#name test; + +s + : foo + | + ; +foo: "foo"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}}, + {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true}, + }, + }, + { + caption: "a production contains non-empty alternative and empty alternative", + src: ` +#name test; + +s + : foo + ; +foo + : bar + | + ; +bar: "bar"; +`, + first: []first{ + {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, + {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, + {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}}, + {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + fst, gram := genActualFirst(t, tt.src) + + for _, ttFirst := range tt.first { + lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs) + if !ok { + t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs) + } + + prod, ok := gram.productionSet.findByLHS(lhsSym) + if !ok { + t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym) + } + + actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot) + if err != nil { + t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err) + } + + expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable) + + testFirst(t, actualFirst, expectedFirst) + } + }) + } +} + +func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err := b.build() + if err != nil { + t.Fatal(err) + } + fst, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + if fst == nil { + t.Fatal("genFiest returned nil without any error") + } + + return fst, gram +} + +func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry { + t.Helper() + + entry := newFirstEntry() + if empty { + entry.addEmpty() + } + for _, sym := range symbols { + symSym, ok := symTab.ToSymbol(sym) + if !ok { + t.Fatalf("a symbol was not found; symbol: %v", sym) + } + entry.add(symSym) + } + + return entry +} + +func testFirst(t *testing.T, actual, expected *firstEntry) { + if actual.empty != expected.empty { + t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty) + } + + if len(actual.symbols) != len(expected.symbols) { + t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) + } + + for eSym := range expected.symbols { + if _, ok := actual.symbols[eSym]; !ok { + t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) + } + } +} + func TestGrammarBuilderOK(t *testing.T) { type okTest struct { caption string @@ -3379,3 +3591,1057 @@ bar }) } } + +func TestGenLALR1Automaton(t *testing.T) { + // This grammar belongs to LALR(1) class, not SLR(1). + src := ` +#name test; + +s: l eq r | r; +l: ref r | id; +r: l; +eq: '='; +ref: '*'; +id: "[A-Za-z0-9_]+"; +` + + var gram *Grammar + var automaton *lalr1Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + + firstSet, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatalf("failed to create a FIRST set: %v", err) + } + + automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet) + if err != nil { + t.Fatalf("failed to create a LALR1 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLALR1Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), + }, + 1: { + withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), + }, + 2: { + withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), + }, + 3: { + withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), + }, + 4: { + withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 5: { + withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), + }, + 6: { + withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), + }, + 7: { + withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 8: { + withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), + }, + 9: { + withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("l"): expectedKernels[2], + genSym("r"): expectedKernels[3], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s'", "s"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("eq"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("r", "l"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "r"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[7], + genSym("l"): expectedKernels[8], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("l", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[9], + genSym("l"): expectedKernels[8], + genSym("ref"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("l", "ref", "r"), + }, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("r", "l"), + }, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "l", "eq", "r"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton.lr0Automaton) +} + +type expectedLRState struct { + kernelItems []*lrItem + nextStates map[symbol.Symbol][]*lrItem + reducibleProds []*production + emptyProdItems []*lrItem +} + +func TestGenLR0Automaton(t *testing.T) { + src := ` +#name test; + +expr + : expr add term + | term + ; +term + : term mul factor + | factor + ; +factor + : l_paren expr r_paren + | id + ; +add: "\+"; +mul: "\*"; +l_paren: "\("; +r_paren: "\)"; +id: "[A-Za-z_][0-9A-Za-z_]*"; +` + + var gram *Grammar + var automaton *lr0Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("expr'", 0, "expr"), + }, + 1: { + genLR0Item("expr'", 1, "expr"), + genLR0Item("expr", 1, "expr", "add", "term"), + }, + 2: { + genLR0Item("expr", 1, "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 3: { + genLR0Item("term", 1, "factor"), + }, + 4: { + genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), + }, + 5: { + genLR0Item("factor", 1, "id"), + }, + 6: { + genLR0Item("expr", 2, "expr", "add", "term"), + }, + 7: { + genLR0Item("term", 2, "term", "mul", "factor"), + }, + 8: { + genLR0Item("expr", 1, "expr", "add", "term"), + genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), + }, + 9: { + genLR0Item("expr", 3, "expr", "add", "term"), + genLR0Item("term", 1, "term", "mul", "factor"), + }, + 10: { + genLR0Item("term", 3, "term", "mul", "factor"), + }, + 11: { + genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("expr"): expectedKernels[1], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("add"): expectedKernels[6], + }, + reducibleProds: []*production{ + genProd("expr'", "expr"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "term"), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("term", "factor"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("expr"): expectedKernels[8], + genSym("term"): expectedKernels[2], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[5], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("factor", "id"), + }, + }, + { + kernelItems: expectedKernels[6], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("term"): expectedKernels[9], + genSym("factor"): expectedKernels[3], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[7], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("factor"): expectedKernels[10], + genSym("l_paren"): expectedKernels[4], + genSym("id"): expectedKernels[5], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[8], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("add"): expectedKernels[6], + genSym("r_paren"): expectedKernels[11], + }, + reducibleProds: []*production{}, + }, + { + kernelItems: expectedKernels[9], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("mul"): expectedKernels[7], + }, + reducibleProds: []*production{ + genProd("expr", "expr", "add", "term"), + }, + }, + { + kernelItems: expectedKernels[10], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("term", "term", "mul", "factor"), + }, + }, + { + kernelItems: expectedKernels[11], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("factor", "l_paren", "expr", "r_paren"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton) +} + +func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { + src := ` +#name test; + +s + : foo bar + ; +foo + : + ; +bar + : b + | + ; + +b: "bar"; +` + + var gram *Grammar + var automaton *lr0Automaton + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + + automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatalf("failed to create a LR0 automaton: %v", err) + } + if automaton == nil { + t.Fatalf("genLR0Automaton returns nil without any error") + } + } + + initialState := automaton.states[automaton.initialState] + if initialState == nil { + t.Errorf("failed to get an initial status: %v", automaton.initialState) + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + genLR0Item("s'", 0, "s"), + }, + 1: { + genLR0Item("s'", 1, "s"), + }, + 2: { + genLR0Item("s", 1, "foo", "bar"), + }, + 3: { + genLR0Item("s", 2, "foo", "bar"), + }, + 4: { + genLR0Item("bar", 1, "b"), + }, + } + + expectedStates := []*expectedLRState{ + { + kernelItems: expectedKernels[0], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("foo"): expectedKernels[2], + }, + reducibleProds: []*production{ + genProd("foo"), + }, + emptyProdItems: []*lrItem{ + genLR0Item("foo", 0), + }, + }, + { + kernelItems: expectedKernels[1], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s'", "s"), + }, + }, + { + kernelItems: expectedKernels[2], + nextStates: map[symbol.Symbol][]*lrItem{ + genSym("bar"): expectedKernels[3], + genSym("b"): expectedKernels[4], + }, + reducibleProds: []*production{ + genProd("bar"), + }, + emptyProdItems: []*lrItem{ + genLR0Item("bar", 0), + }, + }, + { + kernelItems: expectedKernels[3], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("s", "foo", "bar"), + }, + }, + { + kernelItems: expectedKernels[4], + nextStates: map[symbol.Symbol][]*lrItem{}, + reducibleProds: []*production{ + genProd("bar", "b"), + }, + }, + } + + testLRAutomaton(t, expectedStates, automaton) +} + +func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) { + if len(automaton.states) != len(expected) { + t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states)) + } + + for i, eState := range expected { + t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("a kernel was not found: %v", k.id) + } + + // test look-ahead symbols + { + if len(state.kernel.items) != len(eState.kernelItems) { + t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) + } + for _, eKItem := range eState.kernelItems { + var kItem *lrItem + for _, it := range state.kernel.items { + if it.id != eKItem.id { + continue + } + kItem = it + break + } + if kItem == nil { + t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) + } + + if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { + t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) + } + + for eSym := range eKItem.lookAhead.symbols { + if _, ok := kItem.lookAhead.symbols[eSym]; !ok { + t.Errorf("look-ahead symbol not found: %v", eSym) + } + } + } + } + + // test next states + { + if len(state.next) != len(eState.nextStates) { + t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) + } + for eSym, eKItems := range eState.nextStates { + nextStateKernel, err := newKernel(eKItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + nextState, ok := state.next[eSym] + if !ok { + t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) + } + if nextState != nextStateKernel.id { + t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) + } + } + } + + // test reducible productions + { + if len(state.reducible) != len(eState.reducibleProds) { + t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) + } + for _, eProd := range eState.reducibleProds { + if _, ok := state.reducible[eProd.id]; !ok { + t.Errorf("reducible production was not found: %v", eProd.id) + } + } + + if len(state.emptyProdItems) != len(eState.emptyProdItems) { + t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) + } + for _, eItem := range eState.emptyProdItems { + found := false + for _, item := range state.emptyProdItems { + if item.id != eItem.id { + continue + } + found = true + break + } + if !found { + t.Errorf("empty production item not found: %v", eItem.id) + } + } + } + }) + } +} + +type expectedState struct { + kernelItems []*lrItem + acts map[symbol.Symbol]testActionEntry + goTos map[symbol.Symbol][]*lrItem +} + +func TestGenLALRParsingTable(t *testing.T) { + src := ` +#name test; + +s: l eq r | r; +l: ref r | id; +r: l; +eq: '='; +ref: '*'; +id: "[A-Za-z0-9_]+"; +` + + var ptab *ParsingTable + var automaton *lalr1Automaton + var gram *Grammar + var nonTermCount int + var termCount int + { + ast, err := parser.Parse(strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + b := GrammarBuilder{ + AST: ast, + } + gram, err = b.build() + if err != nil { + t.Fatal(err) + } + first, err := genFirstSet(gram.productionSet) + if err != nil { + t.Fatal(err) + } + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + t.Fatal(err) + } + automaton, err = genLALR1Automaton(lr0, gram.productionSet, first) + if err != nil { + t.Fatal(err) + } + + nonTermTexts, err := gram.symbolTable.NonTerminalTexts() + if err != nil { + t.Fatal(err) + } + termTexts, err := gram.symbolTable.TerminalTexts() + if err != nil { + t.Fatal(err) + } + nonTermCount = len(nonTermTexts) + termCount = len(termTexts) + + lalr := &lrTableBuilder{ + automaton: automaton.lr0Automaton, + prods: gram.productionSet, + termCount: termCount, + nonTermCount: nonTermCount, + symTab: gram.symbolTable, + } + ptab, err = lalr.build() + if err != nil { + t.Fatalf("failed to create a LALR parsing table: %v", err) + } + if ptab == nil { + t.Fatal("genLALRParsingTable returns nil without any error") + } + } + + genSym := newTestSymbolGenerator(t, gram.symbolTable) + genProd := newTestProductionGenerator(t, genSym) + genLR0Item := newTestLR0ItemGenerator(t, genProd) + + expectedKernels := map[int][]*lrItem{ + 0: { + withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), + }, + 1: { + withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), + }, + 2: { + withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), + }, + 3: { + withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), + }, + 4: { + withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 5: { + withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), + }, + 6: { + withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), + }, + 7: { + withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), + }, + 8: { + withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), + }, + 9: { + withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), + }, + } + + expectedStates := []expectedState{ + { + kernelItems: expectedKernels[0], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("s"): expectedKernels[1], + genSym("l"): expectedKernels[2], + genSym("r"): expectedKernels[3], + }, + }, + { + kernelItems: expectedKernels[1], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s'", "s"), + }, + }, + }, + { + kernelItems: expectedKernels[2], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeShift, + nextState: expectedKernels[6], + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + }, + }, + { + kernelItems: expectedKernels[3], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s", "r"), + }, + }, + }, + { + kernelItems: expectedKernels[4], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("r"): expectedKernels[7], + genSym("l"): expectedKernels[8], + }, + }, + { + kernelItems: expectedKernels[5], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("l", "id"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("l", "id"), + }, + }, + }, + { + kernelItems: expectedKernels[6], + acts: map[symbol.Symbol]testActionEntry{ + genSym("ref"): { + ty: ActionTypeShift, + nextState: expectedKernels[4], + }, + genSym("id"): { + ty: ActionTypeShift, + nextState: expectedKernels[5], + }, + }, + goTos: map[symbol.Symbol][]*lrItem{ + genSym("l"): expectedKernels[8], + genSym("r"): expectedKernels[9], + }, + }, + { + kernelItems: expectedKernels[7], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("l", "ref", "r"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("l", "ref", "r"), + }, + }, + }, + { + kernelItems: expectedKernels[8], + acts: map[symbol.Symbol]testActionEntry{ + genSym("eq"): { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("r", "l"), + }, + }, + }, + { + kernelItems: expectedKernels[9], + acts: map[symbol.Symbol]testActionEntry{ + symbol.SymbolEOF: { + ty: ActionTypeReduce, + production: genProd("s", "l", "eq", "r"), + }, + }, + }, + } + + t.Run("initial state", func(t *testing.T) { + iniState := findStateByNum(automaton.states, ptab.InitialState) + if iniState == nil { + t.Fatalf("the initial state was not found: #%v", ptab.InitialState) + } + eIniState, err := newKernel(expectedKernels[0]) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + if iniState.id != eIniState.id { + t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) + } + }) + + for i, eState := range expectedStates { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + k, err := newKernel(eState.kernelItems) + if err != nil { + t.Fatalf("failed to create a kernel item: %v", err) + } + state, ok := automaton.states[k.id] + if !ok { + t.Fatalf("state was not found: #%v", 0) + } + + testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) + testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) + }) + } +} + +func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) { + nonEmptyEntries := map[symbol.SymbolNum]struct{}{} + for eSym, eAct := range expectedState.acts { + nonEmptyEntries[eSym.Num()] = struct{}{} + + ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num()) + if ty != eAct.ty { + t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) + } + switch eAct.ty { + case ActionTypeShift: + eNextState, err := newKernel(eAct.nextState) + if err != nil { + t.Fatal(err) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found; state: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + case ActionTypeReduce: + prod := findProductionByNum(gram.productionSet, prodNum) + if prod == nil { + t.Fatalf("production was not found: #%v", prodNum) + } + if prod.id != eAct.production.id { + t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) + } + } + } + for symNum := 0; symNum < termCount; symNum++ { + if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { + continue + } + ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum)) + if ty != ActionTypeError { + t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) + } + } +} + +func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) { + nonEmptyEntries := map[symbol.SymbolNum]struct{}{} + for eSym, eGoTo := range expectedState.goTos { + nonEmptyEntries[eSym.Num()] = struct{}{} + + eNextState, err := newKernel(eGoTo) + if err != nil { + t.Fatal(err) + } + ty, stateNum := ptab.getGoTo(state.num, eSym.Num()) + if ty != GoToTypeRegistered { + t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) + } + nextState := findStateByNum(automaton.states, stateNum) + if nextState == nil { + t.Fatalf("state was not found: #%v", stateNum) + } + if nextState.id != eNextState.id { + t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) + } + } + for symNum := 0; symNum < nonTermCount; symNum++ { + if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { + continue + } + ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum)) + if ty != GoToTypeError { + t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) + } + } +} + +type testActionEntry struct { + ty ActionType + nextState []*lrItem + production *production +} + +func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState { + for _, state := range states { + if state.num == num { + return state + } + } + return nil +} + +func findProductionByNum(prods *productionSet, num productionNum) *production { + for _, prod := range prods.getAllProductions() { + if prod.num == num { + return prod + } + } + return nil +} + +type testSymbolGenerator func(text string) symbol.Symbol + +func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator { + return func(text string) symbol.Symbol { + t.Helper() + + sym, ok := symTab.ToSymbol(text) + if !ok { + t.Fatalf("symbol was not found: %v", text) + } + return sym + } +} + +type testProductionGenerator func(lhs string, rhs ...string) *production + +func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator { + return func(lhs string, rhs ...string) *production { + t.Helper() + + rhsSym := []symbol.Symbol{} + for _, text := range rhs { + rhsSym = append(rhsSym, genSym(text)) + } + prod, err := newProduction(genSym(lhs), rhsSym) + if err != nil { + t.Fatalf("failed to create a production: %v", err) + } + + return prod + } +} + +type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem + +func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { + return func(lhs string, dot int, rhs ...string) *lrItem { + t.Helper() + + prod := genProd(lhs, rhs...) + item, err := newLR0Item(prod, dot) + if err != nil { + t.Fatalf("failed to create a LR0 item: %v", err) + } + + return item + } +} + +func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem { + if item.lookAhead.symbols == nil { + item.lookAhead.symbols = map[symbol.Symbol]struct{}{} + } + + for _, a := range lookAhead { + item.lookAhead.symbols[a] = struct{}{} + } + + return item +} diff --git a/tests/unit/grammar/first_test.go b/tests/unit/grammar/first_test.go deleted file mode 100644 index 9625ef6..0000000 --- a/tests/unit/grammar/first_test.go +++ /dev/null @@ -1,219 +0,0 @@ -package grammar - -import ( - "strings" - "testing" - - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -type first struct { - lhs string - num int - dot int - symbols []string - empty bool -} - -func TestGenFirst(t *testing.T) { - tests := []struct { - caption string - src string - first []first - }{ - { - caption: "productions contain only non-empty productions", - src: ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -`, - first: []first{ - {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}}, - {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}}, - {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}}, - {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}}, - {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}}, - {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}}, - }, - }, - { - caption: "productions contain the empty start production", - src: ` -#name test; - -s - : - ; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "productions contain an empty production", - src: ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar: "bar"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, - {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "a start production contains a non-empty alternative and empty alternative", - src: ` -#name test; - -s - : foo - | - ; -foo: "foo"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}}, - {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "a production contains non-empty alternative and empty alternative", - src: ` -#name test; - -s - : foo - ; -foo - : bar - | - ; -bar: "bar"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, - {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}}, - {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - fst, gram := genActualFirst(t, tt.src) - - for _, ttFirst := range tt.first { - lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs) - if !ok { - t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs) - } - - prod, ok := gram.productionSet.findByLHS(lhsSym) - if !ok { - t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym) - } - - actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot) - if err != nil { - t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err) - } - - expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable) - - testFirst(t, actualFirst, expectedFirst) - } - }) - } -} - -func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err := b.build() - if err != nil { - t.Fatal(err) - } - fst, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - if fst == nil { - t.Fatal("genFiest returned nil without any error") - } - - return fst, gram -} - -func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry { - t.Helper() - - entry := newFirstEntry() - if empty { - entry.addEmpty() - } - for _, sym := range symbols { - symSym, ok := symTab.ToSymbol(sym) - if !ok { - t.Fatalf("a symbol was not found; symbol: %v", sym) - } - entry.add(symSym) - } - - return entry -} - -func testFirst(t *testing.T, actual, expected *firstEntry) { - if actual.empty != expected.empty { - t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty) - } - - if len(actual.symbols) != len(expected.symbols) { - t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) - } - - for eSym := range expected.symbols { - if _, ok := actual.symbols[eSym]; !ok { - t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) - } - } -} diff --git a/tests/unit/grammar/lalr1_test.go b/tests/unit/grammar/lalr1_test.go deleted file mode 100644 index fd09333..0000000 --- a/tests/unit/grammar/lalr1_test.go +++ /dev/null @@ -1,187 +0,0 @@ -package grammar - -import ( - "strings" - "testing" - - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -func TestGenLALR1Automaton(t *testing.T) { - // This grammar belongs to LALR(1) class, not SLR(1). - src := ` -#name test; - -s: l eq r | r; -l: ref r | id; -r: l; -eq: '='; -ref: '*'; -id: "[A-Za-z0-9_]+"; -` - - var gram *Grammar - var automaton *lalr1Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - - firstSet, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatalf("failed to create a FIRST set: %v", err) - } - - automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet) - if err != nil { - t.Fatalf("failed to create a LALR1 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLALR1Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), - }, - 1: { - withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), - }, - 2: { - withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), - withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), - }, - 3: { - withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), - }, - 4: { - withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 5: { - withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), - }, - 6: { - withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), - }, - 7: { - withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 8: { - withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), - }, - 9: { - withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("l"): expectedKernels[2], - genSym("r"): expectedKernels[3], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s'", "s"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("eq"): expectedKernels[6], - }, - reducibleProds: []*production{ - genProd("r", "l"), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "r"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[7], - genSym("l"): expectedKernels[8], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[5], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("l", "id"), - }, - }, - { - kernelItems: expectedKernels[6], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[9], - genSym("l"): expectedKernels[8], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[7], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("l", "ref", "r"), - }, - }, - { - kernelItems: expectedKernels[8], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("r", "l"), - }, - }, - { - kernelItems: expectedKernels[9], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "l", "eq", "r"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton.lr0Automaton) -} diff --git a/tests/unit/grammar/lexical/compiler_test.go b/tests/unit/grammar/lexical.go index b621cd2..b621cd2 100644 --- a/tests/unit/grammar/lexical/compiler_test.go +++ b/tests/unit/grammar/lexical.go diff --git a/tests/unit/grammar/lexical/dfa/tree_test.go b/tests/unit/grammar/lexical/dfa.go index de3ebbb..1a3e16a 100644 --- a/tests/unit/grammar/lexical/dfa/tree_test.go +++ b/tests/unit/grammar/lexical/dfa.go @@ -9,6 +9,191 @@ import ( spec "urubu/spec/grammar" ) +func TestGenDFA(t *testing.T) { + p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) + cpt, err := p.Parse() + if err != nil { + t.Fatal(err) + } + bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ + spec.LexModeKindIDMin: cpt, + }) + if err != nil { + t.Fatal(err) + } + dfa := GenDFA(bt, symTab) + if dfa == nil { + t.Fatalf("DFA is nil") + } + + symPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, false) + if err != nil { + panic(err) + } + return pos + } + + endPos := func(n uint16) symbolPosition { + pos, err := newSymbolPosition(n, true) + if err != nil { + panic(err) + } + return pos + } + + s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) + s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) + s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) + s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) + + rune2Int := func(char rune, index int) uint8 { + return uint8([]byte(string(char))[index]) + } + + tranS0 := [256]string{} + tranS0[rune2Int('a', 0)] = s1.hash() + tranS0[rune2Int('b', 0)] = s0.hash() + + tranS1 := [256]string{} + tranS1[rune2Int('a', 0)] = s1.hash() + tranS1[rune2Int('b', 0)] = s2.hash() + + tranS2 := [256]string{} + tranS2[rune2Int('a', 0)] = s1.hash() + tranS2[rune2Int('b', 0)] = s3.hash() + + tranS3 := [256]string{} + tranS3[rune2Int('a', 0)] = s1.hash() + tranS3[rune2Int('b', 0)] = s0.hash() + + expectedTranTab := map[string][256]string{ + s0.hash(): tranS0, + s1.hash(): tranS1, + s2.hash(): tranS2, + s3.hash(): tranS3, + } + if len(dfa.TransitionTable) != len(expectedTranTab) { + t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) + } + for h, eTranTab := range expectedTranTab { + tranTab, ok := dfa.TransitionTable[h] + if !ok { + t.Errorf("no entry; hash: %v", h) + continue + } + if len(tranTab) != len(eTranTab) { + t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) + } + for c, eNext := range eTranTab { + if eNext == "" { + continue + } + + next := tranTab[c] + if next == "" { + t.Errorf("no enatry: hash: %v, char: %v", h, c) + } + if next != eNext { + t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) + } + } + } + + if dfa.InitialState != s0.hash() { + t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) + } + + accTab := map[string]spec.LexModeKindID{ + s3.hash(): 1, + } + if len(dfa.AcceptingStatesTable) != len(accTab) { + t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) + } + for eState, eID := range accTab { + id, ok := dfa.AcceptingStatesTable[eState] + if !ok { + t.Errorf("accepting state is not found: state: %v", eState) + } + if id != eID { + t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) + } + } +} + +func TestNewSymbolPosition(t *testing.T) { + tests := []struct { + n uint16 + endMark bool + err bool + }{ + { + n: 0, + endMark: false, + err: true, + }, + { + n: 0, + endMark: true, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMin - 1, + endMark: true, + err: true, + }, + { + n: symbolPositionMin, + endMark: false, + }, + { + n: symbolPositionMin, + endMark: true, + }, + { + n: symbolPositionMax, + endMark: false, + }, + { + n: symbolPositionMax, + endMark: true, + }, + { + n: symbolPositionMax + 1, + endMark: false, + err: true, + }, + { + n: symbolPositionMax + 1, + endMark: true, + err: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { + pos, err := newSymbolPosition(tt.n, tt.endMark) + if tt.err { + if err == nil { + t.Fatal("err is nil") + } + return + } + if err != nil { + t.Fatal(err) + } + n, endMark := pos.describe() + if n != tt.n || endMark != tt.endMark { + t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) + } + }) + } +} + func TestByteTree(t *testing.T) { tests := []struct { root byteTree diff --git a/tests/unit/grammar/lexical/dfa/dfa_test.go b/tests/unit/grammar/lexical/dfa/dfa_test.go deleted file mode 100644 index 38577cf..0000000 --- a/tests/unit/grammar/lexical/dfa/dfa_test.go +++ /dev/null @@ -1,121 +0,0 @@ -package dfa - -import ( - "strings" - "testing" - - "urubu/grammar/lexical/parser" - spec "urubu/spec/grammar" -) - -func TestGenDFA(t *testing.T) { - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - dfa := GenDFA(bt, symTab) - if dfa == nil { - t.Fatalf("DFA is nil") - } - - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) - s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) - s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) - s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) - - rune2Int := func(char rune, index int) uint8 { - return uint8([]byte(string(char))[index]) - } - - tranS0 := [256]string{} - tranS0[rune2Int('a', 0)] = s1.hash() - tranS0[rune2Int('b', 0)] = s0.hash() - - tranS1 := [256]string{} - tranS1[rune2Int('a', 0)] = s1.hash() - tranS1[rune2Int('b', 0)] = s2.hash() - - tranS2 := [256]string{} - tranS2[rune2Int('a', 0)] = s1.hash() - tranS2[rune2Int('b', 0)] = s3.hash() - - tranS3 := [256]string{} - tranS3[rune2Int('a', 0)] = s1.hash() - tranS3[rune2Int('b', 0)] = s0.hash() - - expectedTranTab := map[string][256]string{ - s0.hash(): tranS0, - s1.hash(): tranS1, - s2.hash(): tranS2, - s3.hash(): tranS3, - } - if len(dfa.TransitionTable) != len(expectedTranTab) { - t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) - } - for h, eTranTab := range expectedTranTab { - tranTab, ok := dfa.TransitionTable[h] - if !ok { - t.Errorf("no entry; hash: %v", h) - continue - } - if len(tranTab) != len(eTranTab) { - t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) - } - for c, eNext := range eTranTab { - if eNext == "" { - continue - } - - next := tranTab[c] - if next == "" { - t.Errorf("no enatry: hash: %v, char: %v", h, c) - } - if next != eNext { - t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) - } - } - } - - if dfa.InitialState != s0.hash() { - t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) - } - - accTab := map[string]spec.LexModeKindID{ - s3.hash(): 1, - } - if len(dfa.AcceptingStatesTable) != len(accTab) { - t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) - } - for eState, eID := range accTab { - id, ok := dfa.AcceptingStatesTable[eState] - if !ok { - t.Errorf("accepting state is not found: state: %v", eState) - } - if id != eID { - t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) - } - } -} diff --git a/tests/unit/grammar/lexical/dfa/symbol_position_test.go b/tests/unit/grammar/lexical/dfa/symbol_position_test.go deleted file mode 100644 index c867f64..0000000 --- a/tests/unit/grammar/lexical/dfa/symbol_position_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package dfa - -import ( - "fmt" - "testing" -) - -func TestNewSymbolPosition(t *testing.T) { - tests := []struct { - n uint16 - endMark bool - err bool - }{ - { - n: 0, - endMark: false, - err: true, - }, - { - n: 0, - endMark: true, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: true, - err: true, - }, - { - n: symbolPositionMin, - endMark: false, - }, - { - n: symbolPositionMin, - endMark: true, - }, - { - n: symbolPositionMax, - endMark: false, - }, - { - n: symbolPositionMax, - endMark: true, - }, - { - n: symbolPositionMax + 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMax + 1, - endMark: true, - err: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { - pos, err := newSymbolPosition(tt.n, tt.endMark) - if tt.err { - if err == nil { - t.Fatal("err is nil") - } - return - } - if err != nil { - t.Fatal(err) - } - n, endMark := pos.describe() - if n != tt.n || endMark != tt.endMark { - t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) - } - }) - } -} diff --git a/tests/unit/grammar/lexical/parser/parser_test.go b/tests/unit/grammar/lexical/parser.go index 4c9557d..d5d7039 100644 --- a/tests/unit/grammar/lexical/parser/parser_test.go +++ b/tests/unit/grammar/lexical/parser.go @@ -10,6 +10,524 @@ import ( "urubu/ucd" ) +func TestLexer(t *testing.T) { + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "lexer can recognize ordinaly characters", + src: "123abcいろは", + tokens: []*token{ + newToken(tokenKindChar, '1'), + newToken(tokenKindChar, '2'), + newToken(tokenKindChar, '3'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, 'b'), + newToken(tokenKindChar, 'c'), + newToken(tokenKindChar, 'い'), + newToken(tokenKindChar, 'ろ'), + newToken(tokenKindChar, 'は'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in default mode", + src: ".*+?|()[\\u", + tokens: []*token{ + newToken(tokenKindAnyChar, nullChar), + newToken(tokenKindRepeat, nullChar), + newToken(tokenKindRepeatOneOrMore, nullChar), + newToken(tokenKindOption, nullChar), + newToken(tokenKindAlt, nullChar), + newToken(tokenKindGroupOpen, nullChar), + newToken(tokenKindGroupClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in default mode", + src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", + tokens: []*token{ + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "], {, and } are treated as an ordinary character in default mode", + src: "]{}", + tokens: []*token{ + newToken(tokenKindChar, ']'), + newToken(tokenKindChar, '{'), + newToken(tokenKindChar, '}'), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters in bracket expression mode", + src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09AF"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("09abcf"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the escape sequences in bracket expression mode", + src: "[\\^a\\-z]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "in a bracket expression, the special characters are also handled as normal characters", + src: "[\\\\.*+?|()[", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '\\'), + newToken(tokenKindChar, '.'), + newToken(tokenKindChar, '*'), + newToken(tokenKindChar, '+'), + newToken(tokenKindChar, '?'), + newToken(tokenKindChar, '|'), + newToken(tokenKindChar, '('), + newToken(tokenKindChar, ')'), + newToken(tokenKindChar, '['), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", + // [...-...][...-][-...][-] + // ~~~~~~~ ~ ~ ~ + // ^ ^ ^ ^ + // | | | `-- Ordinary Character (b) + // | | `-- Ordinary Character (b) + // | `-- Ordinary Character (b) + // `-- Character Range (a) + // + // a. *-* is handled as a character-range expression. + // b. *-, -*, or - are handled as ordinary characters. + src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", + // [^...^...][^] + // ~~ ~ ~~ + // ^ ^ ^^ + // | | |`-- Ordinary Character (c) + // | | `-- Bracket Expression + // | `-- Ordinary Character (b) + // `-- Inverse Bracket Expression (a) + // + // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. + // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. + // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. + src: "[^^][^]", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '^'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "\\@", + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "\\", + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer raises an error when an invalid escape sequence appears", + src: "[\\@", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", + src: "[\\", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrIncompletedEscSeq, + }, + { + caption: "lexer can recognize the special characters and code points in code point expression mode", + src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("4567"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("89abcd"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("efAB"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("CDEF01"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a one digit hex string isn't a valid code point", + src: "\\u{0", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a two digits hex string isn't a valid code point", + src: "\\u{01", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a three digits hex string isn't a valid code point", + src: "\\u{012", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a four digits hex string is a valid code point", + src: "\\u{0123}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("0123"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a five digits hex string isn't a valid code point", + src: "\\u{01234", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a six digits hex string is a valid code point", + src: "\\u{012345}", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCodePointToken("012345"), + newToken(tokenKindRBrace, nullChar), + }, + }, + { + caption: "a seven digits hex string isn't a valid code point", + src: "\\u{0123456", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{g", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "a code point must be hex digits", + src: "\\u{G", + tokens: []*token{ + newToken(tokenKindCodePointLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + }, + err: synErrInvalidCodePoint, + }, + { + caption: "lexer can recognize the special characters and symbols in character property expression mode", + src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", + tokens: []*token{ + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindCharPropLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newCharPropSymbolToken("General_Category"), + newToken(tokenKindEqual, nullChar), + newCharPropSymbolToken("Letter"), + newToken(tokenKindRBrace, nullChar), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "lexer can recognize the special characters and symbols in fragment expression mode", + src: "\\f{integer}", + tokens: []*token{ + newToken(tokenKindFragmentLeader, nullChar), + newToken(tokenKindLBrace, nullChar), + newFragmentSymbolToken("integer"), + newToken(tokenKindRBrace, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { + caption: "a fragment expression is not supported in a bracket expression", + src: "[\\f", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + { + caption: "a fragment expression is not supported in an inverse bracket expression", + src: "[^\\f", + tokens: []*token{ + newToken(tokenKindInverseBExpOpen, nullChar), + }, + err: synErrInvalidEscSeq, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + lex := newLexer(strings.NewReader(tt.src)) + var err error + var tok *token + i := 0 + for { + tok, err = lex.next() + if err != nil { + break + } + if i >= len(tt.tokens) { + break + } + eTok := tt.tokens[i] + i++ + testToken(t, tok, eTok) + + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) + } + detail, cause := lex.error() + if cause != tt.err { + t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + } + if i < len(tt.tokens) { + t.Fatalf("expecte more tokens") + } + }) + } +} + +func testToken(t *testing.T, a, e *token) { + t.Helper() + if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { + t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) + } +} + func TestParse(t *testing.T) { tests := []struct { pattern string diff --git a/tests/unit/grammar/lexical/parser/lexer_test.go b/tests/unit/grammar/lexical/parser/lexer_test.go deleted file mode 100644 index 055466e..0000000 --- a/tests/unit/grammar/lexical/parser/lexer_test.go +++ /dev/null @@ -1,524 +0,0 @@ -package parser - -import ( - "strings" - "testing" -) - -func TestLexer(t *testing.T) { - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "lexer can recognize ordinaly characters", - src: "123abcいろは", - tokens: []*token{ - newToken(tokenKindChar, '1'), - newToken(tokenKindChar, '2'), - newToken(tokenKindChar, '3'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, 'b'), - newToken(tokenKindChar, 'c'), - newToken(tokenKindChar, 'い'), - newToken(tokenKindChar, 'ろ'), - newToken(tokenKindChar, 'は'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in default mode", - src: ".*+?|()[\\u", - tokens: []*token{ - newToken(tokenKindAnyChar, nullChar), - newToken(tokenKindRepeat, nullChar), - newToken(tokenKindRepeatOneOrMore, nullChar), - newToken(tokenKindOption, nullChar), - newToken(tokenKindAlt, nullChar), - newToken(tokenKindGroupOpen, nullChar), - newToken(tokenKindGroupClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in default mode", - src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", - tokens: []*token{ - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "], {, and } are treated as an ordinary character in default mode", - src: "]{}", - tokens: []*token{ - newToken(tokenKindChar, ']'), - newToken(tokenKindChar, '{'), - newToken(tokenKindChar, '}'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in bracket expression mode", - src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09AF"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09abcf"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in bracket expression mode", - src: "[\\^a\\-z]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "in a bracket expression, the special characters are also handled as normal characters", - src: "[\\\\.*+?|()[", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", - // [...-...][...-][-...][-] - // ~~~~~~~ ~ ~ ~ - // ^ ^ ^ ^ - // | | | `-- Ordinary Character (b) - // | | `-- Ordinary Character (b) - // | `-- Ordinary Character (b) - // `-- Character Range (a) - // - // a. *-* is handled as a character-range expression. - // b. *-, -*, or - are handled as ordinary characters. - src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", - // [^...^...][^] - // ~~ ~ ~~ - // ^ ^ ^^ - // | | |`-- Ordinary Character (c) - // | | `-- Bracket Expression - // | `-- Ordinary Character (b) - // `-- Inverse Bracket Expression (a) - // - // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. - // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. - // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. - src: "[^^][^]", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "\\@", - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "\\", - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "[\\@", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "[\\", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer can recognize the special characters and code points in code point expression mode", - src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a one digit hex string isn't a valid code point", - src: "\\u{0", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a two digits hex string isn't a valid code point", - src: "\\u{01", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a three digits hex string isn't a valid code point", - src: "\\u{012", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a four digits hex string is a valid code point", - src: "\\u{0123}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a five digits hex string isn't a valid code point", - src: "\\u{01234", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a six digits hex string is a valid code point", - src: "\\u{012345}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("012345"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a seven digits hex string isn't a valid code point", - src: "\\u{0123456", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{g", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{G", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "lexer can recognize the special characters and symbols in character property expression mode", - src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", - tokens: []*token{ - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters and symbols in fragment expression mode", - src: "\\f{integer}", - tokens: []*token{ - newToken(tokenKindFragmentLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newFragmentSymbolToken("integer"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a fragment expression is not supported in a bracket expression", - src: "[\\f", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "a fragment expression is not supported in an inverse bracket expression", - src: "[^\\f", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - lex := newLexer(strings.NewReader(tt.src)) - var err error - var tok *token - i := 0 - for { - tok, err = lex.next() - if err != nil { - break - } - if i >= len(tt.tokens) { - break - } - eTok := tt.tokens[i] - i++ - testToken(t, tok, eTok) - - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - detail, cause := lex.error() - if cause != tt.err { - t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - } - if i < len(tt.tokens) { - t.Fatalf("expecte more tokens") - } - }) - } -} - -func testToken(t *testing.T, a, e *token) { - t.Helper() - if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { - t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) - } -} diff --git a/tests/unit/grammar/lr0_test.go b/tests/unit/grammar/lr0_test.go deleted file mode 100644 index 0a9ec24..0000000 --- a/tests/unit/grammar/lr0_test.go +++ /dev/null @@ -1,448 +0,0 @@ -package grammar - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -type expectedLRState struct { - kernelItems []*lrItem - nextStates map[symbol.Symbol][]*lrItem - reducibleProds []*production - emptyProdItems []*lrItem -} - -func TestGenLR0Automaton(t *testing.T) { - src := ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -` - - var gram *Grammar - var automaton *lr0Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLR0Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - genLR0Item("expr'", 0, "expr"), - }, - 1: { - genLR0Item("expr'", 1, "expr"), - genLR0Item("expr", 1, "expr", "add", "term"), - }, - 2: { - genLR0Item("expr", 1, "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 3: { - genLR0Item("term", 1, "factor"), - }, - 4: { - genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), - }, - 5: { - genLR0Item("factor", 1, "id"), - }, - 6: { - genLR0Item("expr", 2, "expr", "add", "term"), - }, - 7: { - genLR0Item("term", 2, "term", "mul", "factor"), - }, - 8: { - genLR0Item("expr", 1, "expr", "add", "term"), - genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), - }, - 9: { - genLR0Item("expr", 3, "expr", "add", "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 10: { - genLR0Item("term", 3, "term", "mul", "factor"), - }, - 11: { - genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("expr"): expectedKernels[1], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("add"): expectedKernels[6], - }, - reducibleProds: []*production{ - genProd("expr'", "expr"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("mul"): expectedKernels[7], - }, - reducibleProds: []*production{ - genProd("expr", "term"), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("term", "factor"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("expr"): expectedKernels[8], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[5], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("factor", "id"), - }, - }, - { - kernelItems: expectedKernels[6], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("term"): expectedKernels[9], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[7], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("factor"): expectedKernels[10], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[8], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("add"): expectedKernels[6], - genSym("r_paren"): expectedKernels[11], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[9], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("mul"): expectedKernels[7], - }, - reducibleProds: []*production{ - genProd("expr", "expr", "add", "term"), - }, - }, - { - kernelItems: expectedKernels[10], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("term", "term", "mul", "factor"), - }, - }, - { - kernelItems: expectedKernels[11], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("factor", "l_paren", "expr", "r_paren"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton) -} - -func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { - src := ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar - : b - | - ; - -b: "bar"; -` - - var gram *Grammar - var automaton *lr0Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLR0Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - genLR0Item("s'", 0, "s"), - }, - 1: { - genLR0Item("s'", 1, "s"), - }, - 2: { - genLR0Item("s", 1, "foo", "bar"), - }, - 3: { - genLR0Item("s", 2, "foo", "bar"), - }, - 4: { - genLR0Item("bar", 1, "b"), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("foo"): expectedKernels[2], - }, - reducibleProds: []*production{ - genProd("foo"), - }, - emptyProdItems: []*lrItem{ - genLR0Item("foo", 0), - }, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s'", "s"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("bar"): expectedKernels[3], - genSym("b"): expectedKernels[4], - }, - reducibleProds: []*production{ - genProd("bar"), - }, - emptyProdItems: []*lrItem{ - genLR0Item("bar", 0), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "foo", "bar"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("bar", "b"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton) -} - -func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) { - if len(automaton.states) != len(expected) { - t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states)) - } - - for i, eState := range expected { - t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("a kernel was not found: %v", k.id) - } - - // test look-ahead symbols - { - if len(state.kernel.items) != len(eState.kernelItems) { - t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) - } - for _, eKItem := range eState.kernelItems { - var kItem *lrItem - for _, it := range state.kernel.items { - if it.id != eKItem.id { - continue - } - kItem = it - break - } - if kItem == nil { - t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) - } - - if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { - t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) - } - - for eSym := range eKItem.lookAhead.symbols { - if _, ok := kItem.lookAhead.symbols[eSym]; !ok { - t.Errorf("look-ahead symbol not found: %v", eSym) - } - } - } - } - - // test next states - { - if len(state.next) != len(eState.nextStates) { - t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) - } - for eSym, eKItems := range eState.nextStates { - nextStateKernel, err := newKernel(eKItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - nextState, ok := state.next[eSym] - if !ok { - t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) - } - if nextState != nextStateKernel.id { - t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) - } - } - } - - // test reducible productions - { - if len(state.reducible) != len(eState.reducibleProds) { - t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) - } - for _, eProd := range eState.reducibleProds { - if _, ok := state.reducible[eProd.id]; !ok { - t.Errorf("reducible production was not found: %v", eProd.id) - } - } - - if len(state.emptyProdItems) != len(eState.emptyProdItems) { - t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) - } - for _, eItem := range eState.emptyProdItems { - found := false - for _, item := range state.emptyProdItems { - if item.id != eItem.id { - continue - } - found = true - break - } - if !found { - t.Errorf("empty production item not found: %v", eItem.id) - } - } - } - }) - } -} diff --git a/tests/unit/grammar/parsing_table_test.go b/tests/unit/grammar/parsing_table_test.go deleted file mode 100644 index 342e187..0000000 --- a/tests/unit/grammar/parsing_table_test.go +++ /dev/null @@ -1,387 +0,0 @@ -package grammar - -import ( - "fmt" - "strings" - "testing" - - "urubu/grammar/symbol" - "urubu/spec/grammar/parser" -) - -type expectedState struct { - kernelItems []*lrItem - acts map[symbol.Symbol]testActionEntry - goTos map[symbol.Symbol][]*lrItem -} - -func TestGenLALRParsingTable(t *testing.T) { - src := ` -#name test; - -s: l eq r | r; -l: ref r | id; -r: l; -eq: '='; -ref: '*'; -id: "[A-Za-z0-9_]+"; -` - - var ptab *ParsingTable - var automaton *lalr1Automaton - var gram *Grammar - var nonTermCount int - var termCount int - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - first, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatal(err) - } - automaton, err = genLALR1Automaton(lr0, gram.productionSet, first) - if err != nil { - t.Fatal(err) - } - - nonTermTexts, err := gram.symbolTable.NonTerminalTexts() - if err != nil { - t.Fatal(err) - } - termTexts, err := gram.symbolTable.TerminalTexts() - if err != nil { - t.Fatal(err) - } - nonTermCount = len(nonTermTexts) - termCount = len(termTexts) - - lalr := &lrTableBuilder{ - automaton: automaton.lr0Automaton, - prods: gram.productionSet, - termCount: termCount, - nonTermCount: nonTermCount, - symTab: gram.symbolTable, - } - ptab, err = lalr.build() - if err != nil { - t.Fatalf("failed to create a LALR parsing table: %v", err) - } - if ptab == nil { - t.Fatal("genLALRParsingTable returns nil without any error") - } - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), - }, - 1: { - withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), - }, - 2: { - withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), - withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), - }, - 3: { - withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), - }, - 4: { - withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 5: { - withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), - }, - 6: { - withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), - }, - 7: { - withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 8: { - withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), - }, - 9: { - withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), - }, - } - - expectedStates := []expectedState{ - { - kernelItems: expectedKernels[0], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("l"): expectedKernels[2], - genSym("r"): expectedKernels[3], - }, - }, - { - kernelItems: expectedKernels[1], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s'", "s"), - }, - }, - }, - { - kernelItems: expectedKernels[2], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeShift, - nextState: expectedKernels[6], - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - }, - }, - { - kernelItems: expectedKernels[3], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s", "r"), - }, - }, - }, - { - kernelItems: expectedKernels[4], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[7], - genSym("l"): expectedKernels[8], - }, - }, - { - kernelItems: expectedKernels[5], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("l", "id"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("l", "id"), - }, - }, - }, - { - kernelItems: expectedKernels[6], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("l"): expectedKernels[8], - genSym("r"): expectedKernels[9], - }, - }, - { - kernelItems: expectedKernels[7], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("l", "ref", "r"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("l", "ref", "r"), - }, - }, - }, - { - kernelItems: expectedKernels[8], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - }, - }, - { - kernelItems: expectedKernels[9], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s", "l", "eq", "r"), - }, - }, - }, - } - - t.Run("initial state", func(t *testing.T) { - iniState := findStateByNum(automaton.states, ptab.InitialState) - if iniState == nil { - t.Fatalf("the initial state was not found: #%v", ptab.InitialState) - } - eIniState, err := newKernel(expectedKernels[0]) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - if iniState.id != eIniState.id { - t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) - } - }) - - for i, eState := range expectedStates { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("state was not found: #%v", 0) - } - - testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) - testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) - }) - } -} - -func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) { - nonEmptyEntries := map[symbol.SymbolNum]struct{}{} - for eSym, eAct := range expectedState.acts { - nonEmptyEntries[eSym.Num()] = struct{}{} - - ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num()) - if ty != eAct.ty { - t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) - } - switch eAct.ty { - case ActionTypeShift: - eNextState, err := newKernel(eAct.nextState) - if err != nil { - t.Fatal(err) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found; state: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - case ActionTypeReduce: - prod := findProductionByNum(gram.productionSet, prodNum) - if prod == nil { - t.Fatalf("production was not found: #%v", prodNum) - } - if prod.id != eAct.production.id { - t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) - } - } - } - for symNum := 0; symNum < termCount; symNum++ { - if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { - continue - } - ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum)) - if ty != ActionTypeError { - t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) - } - } -} - -func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) { - nonEmptyEntries := map[symbol.SymbolNum]struct{}{} - for eSym, eGoTo := range expectedState.goTos { - nonEmptyEntries[eSym.Num()] = struct{}{} - - eNextState, err := newKernel(eGoTo) - if err != nil { - t.Fatal(err) - } - ty, stateNum := ptab.getGoTo(state.num, eSym.Num()) - if ty != GoToTypeRegistered { - t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - } - for symNum := 0; symNum < nonTermCount; symNum++ { - if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { - continue - } - ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum)) - if ty != GoToTypeError { - t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) - } - } -} - -type testActionEntry struct { - ty ActionType - nextState []*lrItem - production *production -} - -func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState { - for _, state := range states { - if state.num == num { - return state - } - } - return nil -} - -func findProductionByNum(prods *productionSet, num productionNum) *production { - for _, prod := range prods.getAllProductions() { - if prod.num == num { - return prod - } - } - return nil -} diff --git a/tests/unit/grammar/symbol/symbol_test.go b/tests/unit/grammar/symbol.go index 31c3edd..31c3edd 100644 --- a/tests/unit/grammar/symbol/symbol_test.go +++ b/tests/unit/grammar/symbol.go diff --git a/tests/unit/grammar/test_helper_test.go b/tests/unit/grammar/test_helper_test.go deleted file mode 100644 index 546d2c1..0000000 --- a/tests/unit/grammar/test_helper_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package grammar - -import ( - "testing" - - "urubu/grammar/symbol" -) - -type testSymbolGenerator func(text string) symbol.Symbol - -func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator { - return func(text string) symbol.Symbol { - t.Helper() - - sym, ok := symTab.ToSymbol(text) - if !ok { - t.Fatalf("symbol was not found: %v", text) - } - return sym - } -} - -type testProductionGenerator func(lhs string, rhs ...string) *production - -func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator { - return func(lhs string, rhs ...string) *production { - t.Helper() - - rhsSym := []symbol.Symbol{} - for _, text := range rhs { - rhsSym = append(rhsSym, genSym(text)) - } - prod, err := newProduction(genSym(lhs), rhsSym) - if err != nil { - t.Fatalf("failed to create a production: %v", err) - } - - return prod - } -} - -type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem - -func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { - return func(lhs string, dot int, rhs ...string) *lrItem { - t.Helper() - - prod := genProd(lhs, rhs...) - item, err := newLR0Item(prod, dot) - if err != nil { - t.Fatalf("failed to create a LR0 item: %v", err) - } - - return item - } -} - -func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem { - if item.lookAhead.symbols == nil { - item.lookAhead.symbols = map[symbol.Symbol]struct{}{} - } - - for _, a := range lookAhead { - item.lookAhead.symbols[a] = struct{}{} - } - - return item -} diff --git a/tests/unit/spec/grammar/parser/parser_test.go b/tests/unit/spec/grammar/parser.go index 4161f6b..773c466 100644 --- a/tests/unit/spec/grammar/parser/parser_test.go +++ b/tests/unit/spec/grammar/parser.go @@ -7,6 +7,234 @@ import ( verr "urubu/error" ) +func TestLexer_Run(t *testing.T) { + idTok := func(text string) *token { + return newIDToken(text, newPosition(1, 0)) + } + + termPatTok := func(text string) *token { + return newTerminalPatternToken(text, newPosition(1, 0)) + } + + strTok := func(text string) *token { + return newStringLiteralToken(text, newPosition(1, 0)) + } + + symTok := func(kind tokenKind) *token { + return newSymbolToken(kind, newPosition(1, 0)) + } + + invalidTok := func(text string) *token { + return newInvalidToken(text, newPosition(1, 0)) + } + + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "the lexer can recognize all kinds of tokens", + src: `id"terminal"'string':|;@...#$()`, + tokens: []*token{ + idTok("id"), + termPatTok("terminal"), + strTok(`string`), + symTok(tokenKindColon), + symTok(tokenKindOr), + symTok(tokenKindSemicolon), + symTok(tokenKindLabelMarker), + symTok(tokenKindExpantion), + symTok(tokenKindDirectiveMarker), + symTok(tokenKindOrderedSymbolMarker), + symTok(tokenKindLParen), + symTok(tokenKindRParen), + newEOFToken(), + }, + }, + { + caption: "the lexer can recognize keywords", + src: `fragment`, + tokens: []*token{ + symTok(tokenKindKWFragment), + newEOFToken(), + }, + }, + { + caption: "the lexer can recognize character sequences and escape sequences in a terminal", + src: `"abc\"\\"`, + tokens: []*token{ + termPatTok(`abc"\\`), + newEOFToken(), + }, + }, + { + caption: "backslashes are recognized as they are because escape sequences are not allowed in strings", + src: `'\\\'`, + tokens: []*token{ + strTok(`\\\`), + newEOFToken(), + }, + }, + { + caption: "a pattern must include at least one character", + src: `""`, + err: synErrEmptyPattern, + }, + { + caption: "a string must include at least one character", + src: `''`, + err: synErrEmptyString, + }, + { + caption: "the lexer can recognize newlines and combine consecutive newlines into one", + src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", + tokens: []*token{ + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + newEOFToken(), + }, + }, + { + caption: "the lexer ignores line comments", + src: ` +// This is the first comment. +foo +// This is the second comment. +// This is the third comment. +bar // This is the fourth comment. +`, + tokens: []*token{ + symTok(tokenKindNewline), + idTok("foo"), + symTok(tokenKindNewline), + idTok("bar"), + symTok(tokenKindNewline), + newEOFToken(), + }, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Abc`, + err: synErrIDInvalidChar, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Zyx`, + err: synErrIDInvalidChar, + }, + { + caption: "the underscore cannot be placed at the beginning of an identifier", + src: `_abc`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed at the end of an identifier", + src: `abc_`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed consecutively", + src: `a__b`, + err: synErrIDConsecutiveUnderscores, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `0abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `9abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "an unclosed terminal is not a valid token", + src: `"abc`, + err: synErrUnclosedTerminal, + }, + { + caption: "an incompleted escape sequence in a pattern is not a valid token", + src: `"\`, + err: synErrIncompletedEscSeq, + }, + { + caption: "an unclosed string is not a valid token", + src: `'abc`, + err: synErrUnclosedString, + }, + { + caption: "the lexer can recognize valid tokens following an invalid token", + src: `abc!!!def`, + tokens: []*token{ + idTok("abc"), + invalidTok("!!!"), + idTok("def"), + newEOFToken(), + }, + }, + { + caption: "the lexer skips white spaces", + // \u0009: HT + // \u0020: SP + src: "a\u0009b\u0020c", + tokens: []*token{ + idTok("a"), + idTok("b"), + idTok("c"), + newEOFToken(), + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + l, err := newLexer(strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + n := 0 + for { + var tok *token + tok, err = l.next() + if err != nil { + break + } + testToken(t, tok, tt.tokens[n]) + n++ + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + synErr, ok := err.(*verr.SpecError) + if !ok { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + } + if tt.err != synErr.Cause { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause) + } + } else { + if err != nil { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + } + } + }) + } +} + +func testToken(t *testing.T, tok, expected *token) { + t.Helper() + if tok.kind != expected.kind || tok.text != expected.text { + t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok) + } +} + func TestParse(t *testing.T) { name := func(param *ParameterNode) *DirectiveNode { return &DirectiveNode{ diff --git a/tests/unit/spec/grammar/parser/lexer_test.go b/tests/unit/spec/grammar/parser/lexer_test.go deleted file mode 100644 index c402b42..0000000 --- a/tests/unit/spec/grammar/parser/lexer_test.go +++ /dev/null @@ -1,236 +0,0 @@ -package parser - -import ( - "strings" - "testing" - - verr "urubu/error" -) - -func TestLexer_Run(t *testing.T) { - idTok := func(text string) *token { - return newIDToken(text, newPosition(1, 0)) - } - - termPatTok := func(text string) *token { - return newTerminalPatternToken(text, newPosition(1, 0)) - } - - strTok := func(text string) *token { - return newStringLiteralToken(text, newPosition(1, 0)) - } - - symTok := func(kind tokenKind) *token { - return newSymbolToken(kind, newPosition(1, 0)) - } - - invalidTok := func(text string) *token { - return newInvalidToken(text, newPosition(1, 0)) - } - - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "the lexer can recognize all kinds of tokens", - src: `id"terminal"'string':|;@...#$()`, - tokens: []*token{ - idTok("id"), - termPatTok("terminal"), - strTok(`string`), - symTok(tokenKindColon), - symTok(tokenKindOr), - symTok(tokenKindSemicolon), - symTok(tokenKindLabelMarker), - symTok(tokenKindExpantion), - symTok(tokenKindDirectiveMarker), - symTok(tokenKindOrderedSymbolMarker), - symTok(tokenKindLParen), - symTok(tokenKindRParen), - newEOFToken(), - }, - }, - { - caption: "the lexer can recognize keywords", - src: `fragment`, - tokens: []*token{ - symTok(tokenKindKWFragment), - newEOFToken(), - }, - }, - { - caption: "the lexer can recognize character sequences and escape sequences in a terminal", - src: `"abc\"\\"`, - tokens: []*token{ - termPatTok(`abc"\\`), - newEOFToken(), - }, - }, - { - caption: "backslashes are recognized as they are because escape sequences are not allowed in strings", - src: `'\\\'`, - tokens: []*token{ - strTok(`\\\`), - newEOFToken(), - }, - }, - { - caption: "a pattern must include at least one character", - src: `""`, - err: synErrEmptyPattern, - }, - { - caption: "a string must include at least one character", - src: `''`, - err: synErrEmptyString, - }, - { - caption: "the lexer can recognize newlines and combine consecutive newlines into one", - src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", - tokens: []*token{ - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - newEOFToken(), - }, - }, - { - caption: "the lexer ignores line comments", - src: ` -// This is the first comment. -foo -// This is the second comment. -// This is the third comment. -bar // This is the fourth comment. -`, - tokens: []*token{ - symTok(tokenKindNewline), - idTok("foo"), - symTok(tokenKindNewline), - idTok("bar"), - symTok(tokenKindNewline), - newEOFToken(), - }, - }, - { - caption: "an identifier cannot contain the capital-case letters", - src: `Abc`, - err: synErrIDInvalidChar, - }, - { - caption: "an identifier cannot contain the capital-case letters", - src: `Zyx`, - err: synErrIDInvalidChar, - }, - { - caption: "the underscore cannot be placed at the beginning of an identifier", - src: `_abc`, - err: synErrIDInvalidUnderscorePos, - }, - { - caption: "the underscore cannot be placed at the end of an identifier", - src: `abc_`, - err: synErrIDInvalidUnderscorePos, - }, - { - caption: "the underscore cannot be placed consecutively", - src: `a__b`, - err: synErrIDConsecutiveUnderscores, - }, - { - caption: "the digits cannot be placed at the biginning of an identifier", - src: `0abc`, - err: synErrIDInvalidDigitsPos, - }, - { - caption: "the digits cannot be placed at the biginning of an identifier", - src: `9abc`, - err: synErrIDInvalidDigitsPos, - }, - { - caption: "an unclosed terminal is not a valid token", - src: `"abc`, - err: synErrUnclosedTerminal, - }, - { - caption: "an incompleted escape sequence in a pattern is not a valid token", - src: `"\`, - err: synErrIncompletedEscSeq, - }, - { - caption: "an unclosed string is not a valid token", - src: `'abc`, - err: synErrUnclosedString, - }, - { - caption: "the lexer can recognize valid tokens following an invalid token", - src: `abc!!!def`, - tokens: []*token{ - idTok("abc"), - invalidTok("!!!"), - idTok("def"), - newEOFToken(), - }, - }, - { - caption: "the lexer skips white spaces", - // \u0009: HT - // \u0020: SP - src: "a\u0009b\u0020c", - tokens: []*token{ - idTok("a"), - idTok("b"), - idTok("c"), - newEOFToken(), - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - l, err := newLexer(strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - n := 0 - for { - var tok *token - tok, err = l.next() - if err != nil { - break - } - testToken(t, tok, tt.tokens[n]) - n++ - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - synErr, ok := err.(*verr.SpecError) - if !ok { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) - } - if tt.err != synErr.Cause { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause) - } - } else { - if err != nil { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) - } - } - }) - } -} - -func testToken(t *testing.T, tok, expected *token) { - t.Helper() - if tok.kind != expected.kind || tok.text != expected.text { - t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok) - } -} diff --git a/tests/unit/spec/test/parser_test.go b/tests/unit/spec/test.go index eddba92..eddba92 100644 --- a/tests/unit/spec/test/parser_test.go +++ b/tests/unit/spec/test.go diff --git a/tests/unit/tester/tester_test.go b/tests/unit/tester.go index 3c6b1db..3c6b1db 100644 --- a/tests/unit/tester/tester_test.go +++ b/tests/unit/tester.go diff --git a/tests/unit/utf8/utf8_test.go b/tests/unit/utf8.go index 2dc8093..2dc8093 100644 --- a/tests/unit/utf8/utf8_test.go +++ b/tests/unit/utf8.go |