From 27b4729bd1a57740ea68e774d58d9cb3f45c5589 Mon Sep 17 00:00:00 2001 From: EuAndreh Date: Wed, 11 Dec 2024 16:48:12 -0300 Subject: Do the same single file consolidation on tests --- tests/unit/spec/grammar/parser.go | 1439 +++++++++++++++++++++++++ tests/unit/spec/grammar/parser/lexer_test.go | 236 ---- tests/unit/spec/grammar/parser/parser_test.go | 1211 --------------------- tests/unit/spec/test.go | 411 +++++++ tests/unit/spec/test/parser_test.go | 411 ------- 5 files changed, 1850 insertions(+), 1858 deletions(-) create mode 100644 tests/unit/spec/grammar/parser.go delete mode 100644 tests/unit/spec/grammar/parser/lexer_test.go delete mode 100644 tests/unit/spec/grammar/parser/parser_test.go create mode 100644 tests/unit/spec/test.go delete mode 100644 tests/unit/spec/test/parser_test.go (limited to 'tests/unit/spec') diff --git a/tests/unit/spec/grammar/parser.go b/tests/unit/spec/grammar/parser.go new file mode 100644 index 0000000..773c466 --- /dev/null +++ b/tests/unit/spec/grammar/parser.go @@ -0,0 +1,1439 @@ +package parser + +import ( + "strings" + "testing" + + verr "urubu/error" +) + +func TestLexer_Run(t *testing.T) { + idTok := func(text string) *token { + return newIDToken(text, newPosition(1, 0)) + } + + termPatTok := func(text string) *token { + return newTerminalPatternToken(text, newPosition(1, 0)) + } + + strTok := func(text string) *token { + return newStringLiteralToken(text, newPosition(1, 0)) + } + + symTok := func(kind tokenKind) *token { + return newSymbolToken(kind, newPosition(1, 0)) + } + + invalidTok := func(text string) *token { + return newInvalidToken(text, newPosition(1, 0)) + } + + tests := []struct { + caption string + src string + tokens []*token + err error + }{ + { + caption: "the lexer can recognize all kinds of tokens", + src: `id"terminal"'string':|;@...#$()`, + tokens: []*token{ + idTok("id"), + termPatTok("terminal"), + strTok(`string`), + symTok(tokenKindColon), + symTok(tokenKindOr), + symTok(tokenKindSemicolon), + symTok(tokenKindLabelMarker), + symTok(tokenKindExpantion), + symTok(tokenKindDirectiveMarker), + symTok(tokenKindOrderedSymbolMarker), + symTok(tokenKindLParen), + symTok(tokenKindRParen), + newEOFToken(), + }, + }, + { + caption: "the lexer can recognize keywords", + src: `fragment`, + tokens: []*token{ + symTok(tokenKindKWFragment), + newEOFToken(), + }, + }, + { + caption: "the lexer can recognize character sequences and escape sequences in a terminal", + src: `"abc\"\\"`, + tokens: []*token{ + termPatTok(`abc"\\`), + newEOFToken(), + }, + }, + { + caption: "backslashes are recognized as they are because escape sequences are not allowed in strings", + src: `'\\\'`, + tokens: []*token{ + strTok(`\\\`), + newEOFToken(), + }, + }, + { + caption: "a pattern must include at least one character", + src: `""`, + err: synErrEmptyPattern, + }, + { + caption: "a string must include at least one character", + src: `''`, + err: synErrEmptyString, + }, + { + caption: "the lexer can recognize newlines and combine consecutive newlines into one", + src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", + tokens: []*token{ + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + newEOFToken(), + }, + }, + { + caption: "the lexer ignores line comments", + src: ` +// This is the first comment. +foo +// This is the second comment. +// This is the third comment. +bar // This is the fourth comment. +`, + tokens: []*token{ + symTok(tokenKindNewline), + idTok("foo"), + symTok(tokenKindNewline), + idTok("bar"), + symTok(tokenKindNewline), + newEOFToken(), + }, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Abc`, + err: synErrIDInvalidChar, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Zyx`, + err: synErrIDInvalidChar, + }, + { + caption: "the underscore cannot be placed at the beginning of an identifier", + src: `_abc`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed at the end of an identifier", + src: `abc_`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed consecutively", + src: `a__b`, + err: synErrIDConsecutiveUnderscores, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `0abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `9abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "an unclosed terminal is not a valid token", + src: `"abc`, + err: synErrUnclosedTerminal, + }, + { + caption: "an incompleted escape sequence in a pattern is not a valid token", + src: `"\`, + err: synErrIncompletedEscSeq, + }, + { + caption: "an unclosed string is not a valid token", + src: `'abc`, + err: synErrUnclosedString, + }, + { + caption: "the lexer can recognize valid tokens following an invalid token", + src: `abc!!!def`, + tokens: []*token{ + idTok("abc"), + invalidTok("!!!"), + idTok("def"), + newEOFToken(), + }, + }, + { + caption: "the lexer skips white spaces", + // \u0009: HT + // \u0020: SP + src: "a\u0009b\u0020c", + tokens: []*token{ + idTok("a"), + idTok("b"), + idTok("c"), + newEOFToken(), + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + l, err := newLexer(strings.NewReader(tt.src)) + if err != nil { + t.Fatal(err) + } + n := 0 + for { + var tok *token + tok, err = l.next() + if err != nil { + break + } + testToken(t, tok, tt.tokens[n]) + n++ + if tok.kind == tokenKindEOF { + break + } + } + if tt.err != nil { + synErr, ok := err.(*verr.SpecError) + if !ok { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + } + if tt.err != synErr.Cause { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause) + } + } else { + if err != nil { + t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) + } + } + }) + } +} + +func testToken(t *testing.T, tok, expected *token) { + t.Helper() + if tok.kind != expected.kind || tok.text != expected.text { + t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok) + } +} + +func TestParse(t *testing.T) { + name := func(param *ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "name", + Parameters: []*ParameterNode{param}, + } + } + prec := func(param *ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "prec", + Parameters: []*ParameterNode{param}, + } + } + leftAssoc := func(params ...*ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "left", + Parameters: params, + } + } + rightAssoc := func(params ...*ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "right", + Parameters: params, + } + } + assign := func(params ...*ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "assign", + Parameters: params, + } + } + prod := func(lhs string, alts ...*AlternativeNode) *ProductionNode { + return &ProductionNode{ + LHS: lhs, + RHS: alts, + } + } + withProdPos := func(prod *ProductionNode, pos Position) *ProductionNode { + prod.Pos = pos + return prod + } + withProdDir := func(prod *ProductionNode, dirs ...*DirectiveNode) *ProductionNode { + prod.Directives = dirs + return prod + } + alt := func(elems ...*ElementNode) *AlternativeNode { + return &AlternativeNode{ + Elements: elems, + } + } + withAltPos := func(alt *AlternativeNode, pos Position) *AlternativeNode { + alt.Pos = pos + return alt + } + withAltDir := func(alt *AlternativeNode, dirs ...*DirectiveNode) *AlternativeNode { + alt.Directives = dirs + return alt + } + dir := func(name string, params ...*ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: name, + Parameters: params, + } + } + withDirPos := func(dir *DirectiveNode, pos Position) *DirectiveNode { + dir.Pos = pos + return dir + } + idParam := func(id string) *ParameterNode { + return &ParameterNode{ + ID: id, + } + } + ordSymParam := func(id string) *ParameterNode { + return &ParameterNode{ + OrderedSymbol: id, + } + } + exp := func(param *ParameterNode) *ParameterNode { + param.Expansion = true + return param + } + group := func(dirs ...*DirectiveNode) *ParameterNode { + return &ParameterNode{ + Group: dirs, + } + } + withParamPos := func(param *ParameterNode, pos Position) *ParameterNode { + param.Pos = pos + return param + } + id := func(id string) *ElementNode { + return &ElementNode{ + ID: id, + } + } + pat := func(p string) *ElementNode { + return &ElementNode{ + Pattern: p, + } + } + label := func(name string) *LabelNode { + return &LabelNode{ + Name: name, + } + } + withLabelPos := func(label *LabelNode, pos Position) *LabelNode { + label.Pos = pos + return label + } + withLabel := func(elem *ElementNode, label *LabelNode) *ElementNode { + elem.Label = label + return elem + } + withElemPos := func(elem *ElementNode, pos Position) *ElementNode { + elem.Pos = pos + return elem + } + frag := func(lhs string, rhs string) *FragmentNode { + return &FragmentNode{ + LHS: lhs, + RHS: rhs, + } + } + withFragmentPos := func(frag *FragmentNode, pos Position) *FragmentNode { + frag.Pos = pos + return frag + } + newPos := func(row int) Position { + return Position{ + Row: row, + Col: 0, + } + } + + tests := []struct { + caption string + src string + checkPosition bool + ast *RootNode + synErr *SyntaxError + }{ + { + caption: "a grammar can contain top-level directives", + src: ` +#name test; + +#prec ( + #left a b $x1 + #right c d $x2 + #assign e f $x3 +); +`, + ast: &RootNode{ + Directives: []*DirectiveNode{ + withDirPos( + name( + withParamPos( + idParam("test"), + newPos(2), + ), + ), + newPos(2), + ), + withDirPos( + prec( + withParamPos( + group( + withDirPos( + leftAssoc( + withParamPos( + idParam("a"), + newPos(5), + ), + withParamPos( + idParam("b"), + newPos(5), + ), + withParamPos( + ordSymParam("x1"), + newPos(5), + ), + ), + newPos(5), + ), + withDirPos( + rightAssoc( + withParamPos( + idParam("c"), + newPos(6), + ), + withParamPos( + idParam("d"), + newPos(6), + ), + withParamPos( + ordSymParam("x2"), + newPos(6), + ), + ), + newPos(6), + ), + withDirPos( + assign( + withParamPos( + idParam("e"), + newPos(7), + ), + withParamPos( + idParam("f"), + newPos(7), + ), + withParamPos( + ordSymParam("x3"), + newPos(7), + ), + ), + newPos(7), + ), + ), + newPos(4), + ), + ), + newPos(4), + ), + }, + }, + }, + { + caption: "a top-level directive must be followed by ';'", + src: ` +#name test +`, + synErr: synErrTopLevelDirNoSemicolon, + }, + { + caption: "a directive group must be closed by ')'", + src: ` +#prec ( + #left a b +; +`, + synErr: synErrUnclosedDirGroup, + }, + { + caption: "an ordered symbol marker '$' must be followed by and ID", + src: ` +#prec ( + #assign $ +); +`, + synErr: synErrNoOrderedSymbolName, + }, + { + caption: "single production is a valid grammar", + src: `a: "a";`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + prod("a", alt(pat("a"))), + }, + }, + }, + { + caption: "multiple productions are a valid grammar", + src: ` +e + : e add t + | e sub t + | t + ; +t + : t mul f + | t div f + | f + ; +f + : l_paren e r_paren + | id + ; + +add + : '+'; +sub + : '-'; +mul + : '*'; +div + : '/'; +l_paren + : '('; +r_paren + : ')'; +id + : "[A-Za-z_][0-9A-Za-z_]*"; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("e", + alt(id("e"), id("add"), id("t")), + alt(id("e"), id("sub"), id("t")), + alt(id("t")), + ), + prod("t", + alt(id("t"), id("mul"), id("f")), + alt(id("t"), id("div"), id("f")), + alt(id("f")), + ), + prod("f", + alt(id("l_paren"), id("e"), id("r_paren")), + alt(id("id")), + ), + }, + LexProductions: []*ProductionNode{ + prod("add", alt(pat(`+`))), + prod("sub", alt(pat(`-`))), + prod("mul", alt(pat(`*`))), + prod("div", alt(pat(`/`))), + prod("l_paren", alt(pat(`(`))), + prod("r_paren", alt(pat(`)`))), + prod("id", alt(pat(`[A-Za-z_][0-9A-Za-z_]*`))), + }, + }, + }, + { + caption: "productions can contain the empty alternative", + src: ` +a + : foo + | + ; +b + : + | bar + ; +c + : + ; + +foo + : 'foo'; +bar + : 'bar'; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("a", + alt(id("foo")), + alt(), + ), + prod("b", + alt(), + alt(id("bar")), + ), + prod("c", + alt(), + ), + }, + LexProductions: []*ProductionNode{ + prod("foo", alt(pat(`foo`))), + prod("bar", alt(pat(`bar`))), + }, + }, + }, + { + caption: "a production cannot contain an ordered symbol", + src: ` +a: $x; +`, + synErr: synErrNoSemicolon, + }, + { + caption: "an alternative cannot contain a pattern directly", + src: ` +s + : "foo" bar + ; + +bar + : "bar"; +`, + synErr: synErrPatternInAlt, + }, + { + caption: "an alternative cannot contain a string directly", + src: ` +s + : 'foo' bar + ; +bar + : "bar"; +`, + synErr: synErrPatternInAlt, + }, + { + caption: "a terminal symbol can be defined using a string literal", + src: ` +foo + : 'foo'; +`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + prod("foo", + alt(pat(`foo`)), + ), + }, + }, + }, + { + caption: "a terminal symbol can be defined using a pattern", + src: ` +foo + : "foo"; +`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + prod("foo", + alt(pat(`foo`)), + ), + }, + }, + }, + { + caption: "`fragment` is a reserved word", + src: `fragment: 'fragment';`, + synErr: synErrNoProductionName, + }, + { + caption: "when a source contains an unknown token, the parser raises a syntax error", + src: `a: !;`, + synErr: synErrInvalidToken, + }, + { + caption: "a production must have its name as the first element", + src: `: "a";`, + synErr: synErrNoProductionName, + }, + { + caption: "':' must precede an alternative", + src: `a "a";`, + synErr: synErrNoColon, + }, + { + caption: "';' must follow a production", + src: `a: "a"`, + synErr: synErrNoSemicolon, + }, + { + caption: "';' can only appear at the end of a production", + src: `;`, + synErr: synErrNoProductionName, + }, + { + caption: "a grammar can contain fragments", + src: ` +s + : tagline + ; +tagline: "\f{words} IS OUT THERE."; +fragment words: "[A-Za-z\u{0020}]+"; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("s", + alt(id("tagline")), + ), + }, + LexProductions: []*ProductionNode{ + prod("tagline", + alt(pat(`\f{words} IS OUT THERE.`)), + ), + }, + Fragments: []*FragmentNode{ + frag("words", `[A-Za-z\u{0020}]+`), + }, + }, + }, + { + caption: "the lexer treats consecutive lines as a single token but can count lines correctly", + src: `// This line precedes line comments and blank lines. +// This is a line comment. + + +s + : foo + ; + + +// This line is sandwiched between blank lines. + + +foo: 'foo'; +`, + checkPosition: true, + ast: &RootNode{ + Productions: []*ProductionNode{ + withProdPos( + prod("s", + withAltPos( + alt( + withElemPos( + id("foo"), + newPos(6), + ), + ), + newPos(6), + ), + ), + newPos(5), + ), + }, + LexProductions: []*ProductionNode{ + withProdPos( + prod("foo", + withAltPos( + alt( + withElemPos( + pat(`foo`), + newPos(13), + ), + ), + newPos(13), + ), + ), + newPos(13), + ), + }, + }, + }, + { + caption: "a grammar can contain production directives and alternative directives", + src: ` +mode_tran_seq + : mode_tran_seq mode_tran + | mode_tran + ; +mode_tran + : push_m1 + | push_m2 + | pop_m1 + | pop_m2 + ; + +push_m1 #push m1 + : "->"; +push_m2 #mode m1 #push m2 + : "-->"; +pop_m1 #mode m1 #pop + : "<-"; +pop_m2 #mode m2 #pop + : "<--"; +whitespace #mode default m1 m2 #skip + : "\u{0020}+"; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("mode_tran_seq", + alt(id("mode_tran_seq"), id("mode_tran")), + alt(id("mode_tran")), + ), + prod("mode_tran", + alt(id("push_m1")), + alt(id("push_m2")), + alt(id("pop_m1")), + alt(id("pop_m2")), + ), + }, + LexProductions: []*ProductionNode{ + withProdDir( + prod("push_m1", + alt(pat(`->`)), + ), + dir("push", idParam("m1")), + ), + withProdDir( + prod("push_m2", + alt(pat(`-->`)), + ), + dir("mode", idParam("m1")), + dir("push", idParam("m2")), + ), + withProdDir( + prod("pop_m1", + alt(pat(`<-`)), + ), + dir("mode", idParam("m1")), + dir("pop"), + ), + withProdDir( + prod("pop_m2", + alt(pat(`<--`)), + ), + dir("mode", idParam("m2")), + dir("pop"), + ), + withProdDir( + prod("whitespace", + alt(pat(`\u{0020}+`)), + ), + dir("mode", idParam("default"), idParam("m1"), idParam("m2")), + dir("skip"), + ), + }, + }, + }, + { + caption: "an alternative of a production can have multiple alternative directives", + src: ` +s + : foo bar #prec baz #ast foo bar + ; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("s", + withAltDir( + alt(id("foo"), id("bar")), + dir("prec", idParam("baz")), + dir("ast", idParam("foo"), idParam("bar")), + ), + ), + }, + }, + }, + { + caption: "a lexical production can have multiple production directives", + src: ` +foo #mode a #push b + : 'foo'; +`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + withProdDir( + prod("foo", + alt(pat("foo")), + ), + dir("mode", idParam("a")), + dir("push", idParam("b")), + ), + }, + }, + }, + { + caption: "a production must be followed by a newline", + src: ` +s: foo; foo: "foo"; +`, + synErr: synErrSemicolonNoNewline, + }, + { + caption: "a grammar can contain 'ast' directives and expansion operator", + src: ` +s + : foo bar_list #ast foo bar_list + ; +bar_list + : bar_list bar #ast bar_list... bar + | bar #ast bar + ; +foo: "foo"; +bar: "bar"; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("s", + withAltDir( + alt(id("foo"), id("bar_list")), + dir("ast", idParam("foo"), idParam("bar_list")), + ), + ), + prod("bar_list", + withAltDir( + alt(id("bar_list"), id("bar")), + dir("ast", exp(idParam("bar_list")), idParam("bar")), + ), + withAltDir( + alt(id("bar")), + dir("ast", idParam("bar")), + ), + ), + }, + LexProductions: []*ProductionNode{ + prod("foo", + alt(pat("foo")), + ), + prod("bar", + alt(pat("bar")), + ), + }, + }, + }, + { + caption: "an expansion operator must be preceded by an identifier", + src: ` +s + : foo #ast ... + ; +`, + synErr: synErrStrayExpOp, + }, + { + caption: "an expansion operator must be preceded by an identifier", + src: ` +a + : foo #ast ... foo + ; +`, + synErr: synErrStrayExpOp, + }, + { + caption: "an expansion operator cannot be applied to a pattern", + src: ` +a + : "foo" #ast "foo"... + ; +`, + synErr: synErrInvalidExpOperand, + }, + { + caption: "an expansion operator cannot be applied to a string", + src: ` +a + : 'foo' #ast 'foo'... + ; +`, + synErr: synErrInvalidExpOperand, + }, + { + caption: "an expansion operator cannot be applied to an ordered symbol", + src: ` +a + : foo #ast $foo... + ; +`, + synErr: synErrInvalidExpOperand, + }, + { + caption: "an expansion operator cannot be applied to a directive group", + src: ` +a + : foo #ast ()... + ; +`, + synErr: synErrInvalidExpOperand, + }, + { + caption: "an AST has node positions", + src: ` +exp + : exp add id #ast exp id + | id + ; + +whitespace #skip + : "\u{0020}+"; +add + : '+'; +id + : "\f{letter}(\f{letter}|\f{number})*"; +fragment letter + : "[A-Za-z_]"; +fragment number + : "[0-9]"; +`, + checkPosition: true, + ast: &RootNode{ + Productions: []*ProductionNode{ + withProdPos( + prod("exp", + withAltPos( + withAltDir( + alt( + withElemPos(id("exp"), newPos(3)), + withElemPos(id("add"), newPos(3)), + withElemPos(id("id"), newPos(3)), + ), + withDirPos( + dir("ast", + withParamPos(idParam("exp"), newPos(3)), + withParamPos(idParam("id"), newPos(3)), + ), + newPos(3), + ), + ), + newPos(3), + ), + withAltPos( + alt( + withElemPos(id("id"), newPos(4)), + ), + newPos(4), + ), + ), + newPos(2), + ), + }, + LexProductions: []*ProductionNode{ + withProdPos( + withProdDir( + prod("whitespace", + withAltPos( + alt( + withElemPos( + pat(`\u{0020}+`), + newPos(8), + ), + ), + newPos(8), + ), + ), + withDirPos( + dir("skip"), + newPos(7), + ), + ), + newPos(7), + ), + withProdPos( + prod("add", + withAltPos( + alt( + withElemPos( + pat(`+`), + newPos(10), + ), + ), + newPos(10), + ), + ), + newPos(9), + ), + withProdPos( + prod("id", + withAltPos( + alt( + withElemPos( + pat(`\f{letter}(\f{letter}|\f{number})*`), + newPos(12), + ), + ), + newPos(12), + ), + ), + newPos(11), + ), + }, + Fragments: []*FragmentNode{ + withFragmentPos( + frag("letter", "[A-Za-z_]"), + newPos(13), + ), + withFragmentPos( + frag("number", "[0-9]"), + newPos(15), + ), + }, + }, + }, + { + caption: "a symbol can have a label", + src: ` +expr + : term@lhs add term@rhs + ; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + withProdPos( + prod("expr", + withAltPos( + alt( + withElemPos( + withLabel( + id("term"), + withLabelPos( + label("lhs"), + newPos(3), + ), + ), + newPos(3), + ), + withElemPos( + id("add"), + newPos(3), + ), + withElemPos( + withLabel( + id("term"), + withLabelPos( + label("rhs"), + newPos(3), + ), + ), + newPos(3), + ), + ), + newPos(3), + ), + ), + newPos(2), + ), + }, + }, + }, + { + caption: "a label must be an identifier, not a string", + src: ` +foo + : bar@'baz' + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "a label must be an identifier, not a pattern", + src: ` +foo + : bar@"baz" + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "the symbol marker @ must be followed by an identifier", + src: ` +foo + : bar@ + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "a symbol cannot have more than or equal to two labels", + src: ` +foo + : bar@baz@bra + ; +`, + synErr: synErrLabelWithNoSymbol, + }, + { + caption: "a label must follow a symbol", + src: ` +foo + : @baz + ; +`, + synErr: synErrLabelWithNoSymbol, + }, + { + caption: "a grammar can contain left and right associativities", + src: ` +#prec ( + #left l1 l2 + #left l3 + #right r1 r2 + #right r3 +); + +s + : id l1 id l2 id l3 id + | id r1 id r2 id r3 id + ; + +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +l1 + : 'l1'; +l2 + : 'l2'; +l3 + : 'l3'; +r1 + : 'r1'; +r2 + : 'r2'; +r3 + : 'r3'; +id + : "[A-Za-z0-9_]+"; +`, + ast: &RootNode{ + Directives: []*DirectiveNode{ + withDirPos( + prec( + withParamPos( + group( + withDirPos( + leftAssoc( + withParamPos(idParam("l1"), newPos(3)), + withParamPos(idParam("l2"), newPos(3)), + ), + newPos(3), + ), + withDirPos( + leftAssoc( + withParamPos(idParam("l3"), newPos(4)), + ), + newPos(4), + ), + withDirPos( + rightAssoc( + withParamPos(idParam("r1"), newPos(5)), + withParamPos(idParam("r2"), newPos(5)), + ), + newPos(5), + ), + withDirPos( + rightAssoc( + withParamPos(idParam("r3"), newPos(6)), + ), + newPos(6), + ), + ), + newPos(2), + ), + ), + newPos(2), + ), + }, + Productions: []*ProductionNode{ + prod("s", + alt(id(`id`), id(`l1`), id(`id`), id(`l2`), id(`id`), id(`l3`), id(`id`)), + alt(id(`id`), id(`r1`), id(`id`), id(`r2`), id(`id`), id(`r3`), id(`id`)), + ), + }, + LexProductions: []*ProductionNode{ + withProdDir( + prod("whitespaces", + alt(pat(`[\u{0009}\u{0020}]+`)), + ), + dir("skip"), + ), + prod("l1", alt(pat(`l1`))), + prod("l2", alt(pat(`l2`))), + prod("l3", alt(pat(`l3`))), + prod("r1", alt(pat(`r1`))), + prod("r2", alt(pat(`r2`))), + prod("r3", alt(pat(`r3`))), + prod("id", alt(pat(`[A-Za-z0-9_]+`))), + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.caption, func(t *testing.T) { + ast, err := Parse(strings.NewReader(tt.src)) + if tt.synErr != nil { + synErrs, ok := err.(verr.SpecErrors) + if !ok { + t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, err) + } + synErr := synErrs[0] + if tt.synErr != synErr.Cause { + t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, synErr.Cause) + } + if ast != nil { + t.Fatalf("AST must be nil") + } + } else { + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if ast == nil { + t.Fatalf("AST must be non-nil") + } + testRootNode(t, ast, tt.ast, tt.checkPosition) + } + }) + } +} + +func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { + t.Helper() + if len(root.Productions) != len(expected.Productions) { + t.Fatalf("unexpected length of productions; want: %v, got: %v", len(expected.Productions), len(root.Productions)) + } + if len(root.Directives) != len(expected.Directives) { + t.Fatalf("unexpected length of top-level directives; want: %v, got: %v", len(expected.Directives), len(root.Directives)) + } + for i, dir := range root.Directives { + testDirectives(t, []*DirectiveNode{dir}, []*DirectiveNode{expected.Directives[i]}, true) + } + for i, prod := range root.Productions { + testProductionNode(t, prod, expected.Productions[i], checkPosition) + } + for i, prod := range root.LexProductions { + testProductionNode(t, prod, expected.LexProductions[i], checkPosition) + } + for i, frag := range root.Fragments { + testFragmentNode(t, frag, expected.Fragments[i], checkPosition) + } +} + +func testProductionNode(t *testing.T, prod, expected *ProductionNode, checkPosition bool) { + t.Helper() + if len(expected.Directives) != len(prod.Directives) { + t.Fatalf("unexpected directive count; want: %v directives, got: %v directives", len(expected.Directives), len(prod.Directives)) + } + if len(expected.Directives) > 0 { + testDirectives(t, prod.Directives, expected.Directives, checkPosition) + } + if prod.LHS != expected.LHS { + t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, prod.LHS) + } + if len(prod.RHS) != len(expected.RHS) { + t.Fatalf("unexpected length of an RHS; want: %v, got: %v", len(expected.RHS), len(prod.RHS)) + } + for i, alt := range prod.RHS { + testAlternativeNode(t, alt, expected.RHS[i], checkPosition) + } + if checkPosition { + testPosition(t, prod.Pos, expected.Pos) + } +} + +func testFragmentNode(t *testing.T, frag, expected *FragmentNode, checkPosition bool) { + t.Helper() + if frag.LHS != expected.LHS { + t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, frag.LHS) + } + if frag.RHS != expected.RHS { + t.Fatalf("unexpected RHS; want: %v, got: %v", expected.RHS, frag.RHS) + } + if checkPosition { + testPosition(t, frag.Pos, expected.Pos) + } +} + +func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode, checkPosition bool) { + t.Helper() + if len(alt.Elements) != len(expected.Elements) { + t.Fatalf("unexpected length of elements; want: %v, got: %v", len(expected.Elements), len(alt.Elements)) + } + for i, elem := range alt.Elements { + testElementNode(t, elem, expected.Elements[i], checkPosition) + } + if len(alt.Directives) != len(expected.Directives) { + t.Fatalf("unexpected alternative directive count; want: %v directive, got: %v directive", len(expected.Directives), len(alt.Directives)) + } + if len(alt.Directives) > 0 { + testDirectives(t, alt.Directives, expected.Directives, checkPosition) + } + if checkPosition { + testPosition(t, alt.Pos, expected.Pos) + } +} + +func testElementNode(t *testing.T, elem, expected *ElementNode, checkPosition bool) { + t.Helper() + if elem.ID != expected.ID { + t.Fatalf("unexpected ID; want: %v, got: %v", expected.ID, elem.ID) + } + if elem.Pattern != expected.Pattern { + t.Fatalf("unexpected pattern; want: %v, got: %v", expected.Pattern, elem.Pattern) + } + if checkPosition { + testPosition(t, elem.Pos, expected.Pos) + } +} + +func testDirectives(t *testing.T, dirs, expected []*DirectiveNode, checkPosition bool) { + t.Helper() + for i, exp := range expected { + dir := dirs[i] + + if exp.Name != dir.Name { + t.Fatalf("unexpected directive name; want: %+v, got: %+v", exp.Name, dir.Name) + } + if len(exp.Parameters) != len(dir.Parameters) { + t.Fatalf("unexpected directive parameter; want: %+v, got: %+v", exp.Parameters, dir.Parameters) + } + for j, expParam := range exp.Parameters { + testParameter(t, dir.Parameters[j], expParam, checkPosition) + } + if checkPosition { + testPosition(t, dir.Pos, exp.Pos) + } + } +} + +func testParameter(t *testing.T, param, expected *ParameterNode, checkPosition bool) { + t.Helper() + if param.ID != expected.ID { + t.Fatalf("unexpected ID parameter; want: %v, got: %v", expected.ID, param.ID) + } + if param.String != expected.String { + t.Fatalf("unexpected string parameter; want: %v, got: %v", expected.ID, param.ID) + } + if param.Expansion != expected.Expansion { + t.Fatalf("unexpected expansion; want: %v, got: %v", expected.Expansion, param.Expansion) + } + if checkPosition { + testPosition(t, param.Pos, expected.Pos) + } +} + +func testPosition(t *testing.T, pos, expected Position) { + t.Helper() + if pos.Row != expected.Row { + t.Fatalf("unexpected position want: %+v, got: %+v", expected, pos) + } +} diff --git a/tests/unit/spec/grammar/parser/lexer_test.go b/tests/unit/spec/grammar/parser/lexer_test.go deleted file mode 100644 index c402b42..0000000 --- a/tests/unit/spec/grammar/parser/lexer_test.go +++ /dev/null @@ -1,236 +0,0 @@ -package parser - -import ( - "strings" - "testing" - - verr "urubu/error" -) - -func TestLexer_Run(t *testing.T) { - idTok := func(text string) *token { - return newIDToken(text, newPosition(1, 0)) - } - - termPatTok := func(text string) *token { - return newTerminalPatternToken(text, newPosition(1, 0)) - } - - strTok := func(text string) *token { - return newStringLiteralToken(text, newPosition(1, 0)) - } - - symTok := func(kind tokenKind) *token { - return newSymbolToken(kind, newPosition(1, 0)) - } - - invalidTok := func(text string) *token { - return newInvalidToken(text, newPosition(1, 0)) - } - - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "the lexer can recognize all kinds of tokens", - src: `id"terminal"'string':|;@...#$()`, - tokens: []*token{ - idTok("id"), - termPatTok("terminal"), - strTok(`string`), - symTok(tokenKindColon), - symTok(tokenKindOr), - symTok(tokenKindSemicolon), - symTok(tokenKindLabelMarker), - symTok(tokenKindExpantion), - symTok(tokenKindDirectiveMarker), - symTok(tokenKindOrderedSymbolMarker), - symTok(tokenKindLParen), - symTok(tokenKindRParen), - newEOFToken(), - }, - }, - { - caption: "the lexer can recognize keywords", - src: `fragment`, - tokens: []*token{ - symTok(tokenKindKWFragment), - newEOFToken(), - }, - }, - { - caption: "the lexer can recognize character sequences and escape sequences in a terminal", - src: `"abc\"\\"`, - tokens: []*token{ - termPatTok(`abc"\\`), - newEOFToken(), - }, - }, - { - caption: "backslashes are recognized as they are because escape sequences are not allowed in strings", - src: `'\\\'`, - tokens: []*token{ - strTok(`\\\`), - newEOFToken(), - }, - }, - { - caption: "a pattern must include at least one character", - src: `""`, - err: synErrEmptyPattern, - }, - { - caption: "a string must include at least one character", - src: `''`, - err: synErrEmptyString, - }, - { - caption: "the lexer can recognize newlines and combine consecutive newlines into one", - src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", - tokens: []*token{ - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - symTok(tokenKindOr), - symTok(tokenKindNewline), - newEOFToken(), - }, - }, - { - caption: "the lexer ignores line comments", - src: ` -// This is the first comment. -foo -// This is the second comment. -// This is the third comment. -bar // This is the fourth comment. -`, - tokens: []*token{ - symTok(tokenKindNewline), - idTok("foo"), - symTok(tokenKindNewline), - idTok("bar"), - symTok(tokenKindNewline), - newEOFToken(), - }, - }, - { - caption: "an identifier cannot contain the capital-case letters", - src: `Abc`, - err: synErrIDInvalidChar, - }, - { - caption: "an identifier cannot contain the capital-case letters", - src: `Zyx`, - err: synErrIDInvalidChar, - }, - { - caption: "the underscore cannot be placed at the beginning of an identifier", - src: `_abc`, - err: synErrIDInvalidUnderscorePos, - }, - { - caption: "the underscore cannot be placed at the end of an identifier", - src: `abc_`, - err: synErrIDInvalidUnderscorePos, - }, - { - caption: "the underscore cannot be placed consecutively", - src: `a__b`, - err: synErrIDConsecutiveUnderscores, - }, - { - caption: "the digits cannot be placed at the biginning of an identifier", - src: `0abc`, - err: synErrIDInvalidDigitsPos, - }, - { - caption: "the digits cannot be placed at the biginning of an identifier", - src: `9abc`, - err: synErrIDInvalidDigitsPos, - }, - { - caption: "an unclosed terminal is not a valid token", - src: `"abc`, - err: synErrUnclosedTerminal, - }, - { - caption: "an incompleted escape sequence in a pattern is not a valid token", - src: `"\`, - err: synErrIncompletedEscSeq, - }, - { - caption: "an unclosed string is not a valid token", - src: `'abc`, - err: synErrUnclosedString, - }, - { - caption: "the lexer can recognize valid tokens following an invalid token", - src: `abc!!!def`, - tokens: []*token{ - idTok("abc"), - invalidTok("!!!"), - idTok("def"), - newEOFToken(), - }, - }, - { - caption: "the lexer skips white spaces", - // \u0009: HT - // \u0020: SP - src: "a\u0009b\u0020c", - tokens: []*token{ - idTok("a"), - idTok("b"), - idTok("c"), - newEOFToken(), - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - l, err := newLexer(strings.NewReader(tt.src)) - if err != nil { - t.Fatal(err) - } - n := 0 - for { - var tok *token - tok, err = l.next() - if err != nil { - break - } - testToken(t, tok, tt.tokens[n]) - n++ - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - synErr, ok := err.(*verr.SpecError) - if !ok { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) - } - if tt.err != synErr.Cause { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause) - } - } else { - if err != nil { - t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err) - } - } - }) - } -} - -func testToken(t *testing.T, tok, expected *token) { - t.Helper() - if tok.kind != expected.kind || tok.text != expected.text { - t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok) - } -} diff --git a/tests/unit/spec/grammar/parser/parser_test.go b/tests/unit/spec/grammar/parser/parser_test.go deleted file mode 100644 index 4161f6b..0000000 --- a/tests/unit/spec/grammar/parser/parser_test.go +++ /dev/null @@ -1,1211 +0,0 @@ -package parser - -import ( - "strings" - "testing" - - verr "urubu/error" -) - -func TestParse(t *testing.T) { - name := func(param *ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "name", - Parameters: []*ParameterNode{param}, - } - } - prec := func(param *ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "prec", - Parameters: []*ParameterNode{param}, - } - } - leftAssoc := func(params ...*ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "left", - Parameters: params, - } - } - rightAssoc := func(params ...*ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "right", - Parameters: params, - } - } - assign := func(params ...*ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: "assign", - Parameters: params, - } - } - prod := func(lhs string, alts ...*AlternativeNode) *ProductionNode { - return &ProductionNode{ - LHS: lhs, - RHS: alts, - } - } - withProdPos := func(prod *ProductionNode, pos Position) *ProductionNode { - prod.Pos = pos - return prod - } - withProdDir := func(prod *ProductionNode, dirs ...*DirectiveNode) *ProductionNode { - prod.Directives = dirs - return prod - } - alt := func(elems ...*ElementNode) *AlternativeNode { - return &AlternativeNode{ - Elements: elems, - } - } - withAltPos := func(alt *AlternativeNode, pos Position) *AlternativeNode { - alt.Pos = pos - return alt - } - withAltDir := func(alt *AlternativeNode, dirs ...*DirectiveNode) *AlternativeNode { - alt.Directives = dirs - return alt - } - dir := func(name string, params ...*ParameterNode) *DirectiveNode { - return &DirectiveNode{ - Name: name, - Parameters: params, - } - } - withDirPos := func(dir *DirectiveNode, pos Position) *DirectiveNode { - dir.Pos = pos - return dir - } - idParam := func(id string) *ParameterNode { - return &ParameterNode{ - ID: id, - } - } - ordSymParam := func(id string) *ParameterNode { - return &ParameterNode{ - OrderedSymbol: id, - } - } - exp := func(param *ParameterNode) *ParameterNode { - param.Expansion = true - return param - } - group := func(dirs ...*DirectiveNode) *ParameterNode { - return &ParameterNode{ - Group: dirs, - } - } - withParamPos := func(param *ParameterNode, pos Position) *ParameterNode { - param.Pos = pos - return param - } - id := func(id string) *ElementNode { - return &ElementNode{ - ID: id, - } - } - pat := func(p string) *ElementNode { - return &ElementNode{ - Pattern: p, - } - } - label := func(name string) *LabelNode { - return &LabelNode{ - Name: name, - } - } - withLabelPos := func(label *LabelNode, pos Position) *LabelNode { - label.Pos = pos - return label - } - withLabel := func(elem *ElementNode, label *LabelNode) *ElementNode { - elem.Label = label - return elem - } - withElemPos := func(elem *ElementNode, pos Position) *ElementNode { - elem.Pos = pos - return elem - } - frag := func(lhs string, rhs string) *FragmentNode { - return &FragmentNode{ - LHS: lhs, - RHS: rhs, - } - } - withFragmentPos := func(frag *FragmentNode, pos Position) *FragmentNode { - frag.Pos = pos - return frag - } - newPos := func(row int) Position { - return Position{ - Row: row, - Col: 0, - } - } - - tests := []struct { - caption string - src string - checkPosition bool - ast *RootNode - synErr *SyntaxError - }{ - { - caption: "a grammar can contain top-level directives", - src: ` -#name test; - -#prec ( - #left a b $x1 - #right c d $x2 - #assign e f $x3 -); -`, - ast: &RootNode{ - Directives: []*DirectiveNode{ - withDirPos( - name( - withParamPos( - idParam("test"), - newPos(2), - ), - ), - newPos(2), - ), - withDirPos( - prec( - withParamPos( - group( - withDirPos( - leftAssoc( - withParamPos( - idParam("a"), - newPos(5), - ), - withParamPos( - idParam("b"), - newPos(5), - ), - withParamPos( - ordSymParam("x1"), - newPos(5), - ), - ), - newPos(5), - ), - withDirPos( - rightAssoc( - withParamPos( - idParam("c"), - newPos(6), - ), - withParamPos( - idParam("d"), - newPos(6), - ), - withParamPos( - ordSymParam("x2"), - newPos(6), - ), - ), - newPos(6), - ), - withDirPos( - assign( - withParamPos( - idParam("e"), - newPos(7), - ), - withParamPos( - idParam("f"), - newPos(7), - ), - withParamPos( - ordSymParam("x3"), - newPos(7), - ), - ), - newPos(7), - ), - ), - newPos(4), - ), - ), - newPos(4), - ), - }, - }, - }, - { - caption: "a top-level directive must be followed by ';'", - src: ` -#name test -`, - synErr: synErrTopLevelDirNoSemicolon, - }, - { - caption: "a directive group must be closed by ')'", - src: ` -#prec ( - #left a b -; -`, - synErr: synErrUnclosedDirGroup, - }, - { - caption: "an ordered symbol marker '$' must be followed by and ID", - src: ` -#prec ( - #assign $ -); -`, - synErr: synErrNoOrderedSymbolName, - }, - { - caption: "single production is a valid grammar", - src: `a: "a";`, - ast: &RootNode{ - LexProductions: []*ProductionNode{ - prod("a", alt(pat("a"))), - }, - }, - }, - { - caption: "multiple productions are a valid grammar", - src: ` -e - : e add t - | e sub t - | t - ; -t - : t mul f - | t div f - | f - ; -f - : l_paren e r_paren - | id - ; - -add - : '+'; -sub - : '-'; -mul - : '*'; -div - : '/'; -l_paren - : '('; -r_paren - : ')'; -id - : "[A-Za-z_][0-9A-Za-z_]*"; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("e", - alt(id("e"), id("add"), id("t")), - alt(id("e"), id("sub"), id("t")), - alt(id("t")), - ), - prod("t", - alt(id("t"), id("mul"), id("f")), - alt(id("t"), id("div"), id("f")), - alt(id("f")), - ), - prod("f", - alt(id("l_paren"), id("e"), id("r_paren")), - alt(id("id")), - ), - }, - LexProductions: []*ProductionNode{ - prod("add", alt(pat(`+`))), - prod("sub", alt(pat(`-`))), - prod("mul", alt(pat(`*`))), - prod("div", alt(pat(`/`))), - prod("l_paren", alt(pat(`(`))), - prod("r_paren", alt(pat(`)`))), - prod("id", alt(pat(`[A-Za-z_][0-9A-Za-z_]*`))), - }, - }, - }, - { - caption: "productions can contain the empty alternative", - src: ` -a - : foo - | - ; -b - : - | bar - ; -c - : - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("a", - alt(id("foo")), - alt(), - ), - prod("b", - alt(), - alt(id("bar")), - ), - prod("c", - alt(), - ), - }, - LexProductions: []*ProductionNode{ - prod("foo", alt(pat(`foo`))), - prod("bar", alt(pat(`bar`))), - }, - }, - }, - { - caption: "a production cannot contain an ordered symbol", - src: ` -a: $x; -`, - synErr: synErrNoSemicolon, - }, - { - caption: "an alternative cannot contain a pattern directly", - src: ` -s - : "foo" bar - ; - -bar - : "bar"; -`, - synErr: synErrPatternInAlt, - }, - { - caption: "an alternative cannot contain a string directly", - src: ` -s - : 'foo' bar - ; -bar - : "bar"; -`, - synErr: synErrPatternInAlt, - }, - { - caption: "a terminal symbol can be defined using a string literal", - src: ` -foo - : 'foo'; -`, - ast: &RootNode{ - LexProductions: []*ProductionNode{ - prod("foo", - alt(pat(`foo`)), - ), - }, - }, - }, - { - caption: "a terminal symbol can be defined using a pattern", - src: ` -foo - : "foo"; -`, - ast: &RootNode{ - LexProductions: []*ProductionNode{ - prod("foo", - alt(pat(`foo`)), - ), - }, - }, - }, - { - caption: "`fragment` is a reserved word", - src: `fragment: 'fragment';`, - synErr: synErrNoProductionName, - }, - { - caption: "when a source contains an unknown token, the parser raises a syntax error", - src: `a: !;`, - synErr: synErrInvalidToken, - }, - { - caption: "a production must have its name as the first element", - src: `: "a";`, - synErr: synErrNoProductionName, - }, - { - caption: "':' must precede an alternative", - src: `a "a";`, - synErr: synErrNoColon, - }, - { - caption: "';' must follow a production", - src: `a: "a"`, - synErr: synErrNoSemicolon, - }, - { - caption: "';' can only appear at the end of a production", - src: `;`, - synErr: synErrNoProductionName, - }, - { - caption: "a grammar can contain fragments", - src: ` -s - : tagline - ; -tagline: "\f{words} IS OUT THERE."; -fragment words: "[A-Za-z\u{0020}]+"; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("s", - alt(id("tagline")), - ), - }, - LexProductions: []*ProductionNode{ - prod("tagline", - alt(pat(`\f{words} IS OUT THERE.`)), - ), - }, - Fragments: []*FragmentNode{ - frag("words", `[A-Za-z\u{0020}]+`), - }, - }, - }, - { - caption: "the lexer treats consecutive lines as a single token but can count lines correctly", - src: `// This line precedes line comments and blank lines. -// This is a line comment. - - -s - : foo - ; - - -// This line is sandwiched between blank lines. - - -foo: 'foo'; -`, - checkPosition: true, - ast: &RootNode{ - Productions: []*ProductionNode{ - withProdPos( - prod("s", - withAltPos( - alt( - withElemPos( - id("foo"), - newPos(6), - ), - ), - newPos(6), - ), - ), - newPos(5), - ), - }, - LexProductions: []*ProductionNode{ - withProdPos( - prod("foo", - withAltPos( - alt( - withElemPos( - pat(`foo`), - newPos(13), - ), - ), - newPos(13), - ), - ), - newPos(13), - ), - }, - }, - }, - { - caption: "a grammar can contain production directives and alternative directives", - src: ` -mode_tran_seq - : mode_tran_seq mode_tran - | mode_tran - ; -mode_tran - : push_m1 - | push_m2 - | pop_m1 - | pop_m2 - ; - -push_m1 #push m1 - : "->"; -push_m2 #mode m1 #push m2 - : "-->"; -pop_m1 #mode m1 #pop - : "<-"; -pop_m2 #mode m2 #pop - : "<--"; -whitespace #mode default m1 m2 #skip - : "\u{0020}+"; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("mode_tran_seq", - alt(id("mode_tran_seq"), id("mode_tran")), - alt(id("mode_tran")), - ), - prod("mode_tran", - alt(id("push_m1")), - alt(id("push_m2")), - alt(id("pop_m1")), - alt(id("pop_m2")), - ), - }, - LexProductions: []*ProductionNode{ - withProdDir( - prod("push_m1", - alt(pat(`->`)), - ), - dir("push", idParam("m1")), - ), - withProdDir( - prod("push_m2", - alt(pat(`-->`)), - ), - dir("mode", idParam("m1")), - dir("push", idParam("m2")), - ), - withProdDir( - prod("pop_m1", - alt(pat(`<-`)), - ), - dir("mode", idParam("m1")), - dir("pop"), - ), - withProdDir( - prod("pop_m2", - alt(pat(`<--`)), - ), - dir("mode", idParam("m2")), - dir("pop"), - ), - withProdDir( - prod("whitespace", - alt(pat(`\u{0020}+`)), - ), - dir("mode", idParam("default"), idParam("m1"), idParam("m2")), - dir("skip"), - ), - }, - }, - }, - { - caption: "an alternative of a production can have multiple alternative directives", - src: ` -s - : foo bar #prec baz #ast foo bar - ; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("s", - withAltDir( - alt(id("foo"), id("bar")), - dir("prec", idParam("baz")), - dir("ast", idParam("foo"), idParam("bar")), - ), - ), - }, - }, - }, - { - caption: "a lexical production can have multiple production directives", - src: ` -foo #mode a #push b - : 'foo'; -`, - ast: &RootNode{ - LexProductions: []*ProductionNode{ - withProdDir( - prod("foo", - alt(pat("foo")), - ), - dir("mode", idParam("a")), - dir("push", idParam("b")), - ), - }, - }, - }, - { - caption: "a production must be followed by a newline", - src: ` -s: foo; foo: "foo"; -`, - synErr: synErrSemicolonNoNewline, - }, - { - caption: "a grammar can contain 'ast' directives and expansion operator", - src: ` -s - : foo bar_list #ast foo bar_list - ; -bar_list - : bar_list bar #ast bar_list... bar - | bar #ast bar - ; -foo: "foo"; -bar: "bar"; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - prod("s", - withAltDir( - alt(id("foo"), id("bar_list")), - dir("ast", idParam("foo"), idParam("bar_list")), - ), - ), - prod("bar_list", - withAltDir( - alt(id("bar_list"), id("bar")), - dir("ast", exp(idParam("bar_list")), idParam("bar")), - ), - withAltDir( - alt(id("bar")), - dir("ast", idParam("bar")), - ), - ), - }, - LexProductions: []*ProductionNode{ - prod("foo", - alt(pat("foo")), - ), - prod("bar", - alt(pat("bar")), - ), - }, - }, - }, - { - caption: "an expansion operator must be preceded by an identifier", - src: ` -s - : foo #ast ... - ; -`, - synErr: synErrStrayExpOp, - }, - { - caption: "an expansion operator must be preceded by an identifier", - src: ` -a - : foo #ast ... foo - ; -`, - synErr: synErrStrayExpOp, - }, - { - caption: "an expansion operator cannot be applied to a pattern", - src: ` -a - : "foo" #ast "foo"... - ; -`, - synErr: synErrInvalidExpOperand, - }, - { - caption: "an expansion operator cannot be applied to a string", - src: ` -a - : 'foo' #ast 'foo'... - ; -`, - synErr: synErrInvalidExpOperand, - }, - { - caption: "an expansion operator cannot be applied to an ordered symbol", - src: ` -a - : foo #ast $foo... - ; -`, - synErr: synErrInvalidExpOperand, - }, - { - caption: "an expansion operator cannot be applied to a directive group", - src: ` -a - : foo #ast ()... - ; -`, - synErr: synErrInvalidExpOperand, - }, - { - caption: "an AST has node positions", - src: ` -exp - : exp add id #ast exp id - | id - ; - -whitespace #skip - : "\u{0020}+"; -add - : '+'; -id - : "\f{letter}(\f{letter}|\f{number})*"; -fragment letter - : "[A-Za-z_]"; -fragment number - : "[0-9]"; -`, - checkPosition: true, - ast: &RootNode{ - Productions: []*ProductionNode{ - withProdPos( - prod("exp", - withAltPos( - withAltDir( - alt( - withElemPos(id("exp"), newPos(3)), - withElemPos(id("add"), newPos(3)), - withElemPos(id("id"), newPos(3)), - ), - withDirPos( - dir("ast", - withParamPos(idParam("exp"), newPos(3)), - withParamPos(idParam("id"), newPos(3)), - ), - newPos(3), - ), - ), - newPos(3), - ), - withAltPos( - alt( - withElemPos(id("id"), newPos(4)), - ), - newPos(4), - ), - ), - newPos(2), - ), - }, - LexProductions: []*ProductionNode{ - withProdPos( - withProdDir( - prod("whitespace", - withAltPos( - alt( - withElemPos( - pat(`\u{0020}+`), - newPos(8), - ), - ), - newPos(8), - ), - ), - withDirPos( - dir("skip"), - newPos(7), - ), - ), - newPos(7), - ), - withProdPos( - prod("add", - withAltPos( - alt( - withElemPos( - pat(`+`), - newPos(10), - ), - ), - newPos(10), - ), - ), - newPos(9), - ), - withProdPos( - prod("id", - withAltPos( - alt( - withElemPos( - pat(`\f{letter}(\f{letter}|\f{number})*`), - newPos(12), - ), - ), - newPos(12), - ), - ), - newPos(11), - ), - }, - Fragments: []*FragmentNode{ - withFragmentPos( - frag("letter", "[A-Za-z_]"), - newPos(13), - ), - withFragmentPos( - frag("number", "[0-9]"), - newPos(15), - ), - }, - }, - }, - { - caption: "a symbol can have a label", - src: ` -expr - : term@lhs add term@rhs - ; -`, - ast: &RootNode{ - Productions: []*ProductionNode{ - withProdPos( - prod("expr", - withAltPos( - alt( - withElemPos( - withLabel( - id("term"), - withLabelPos( - label("lhs"), - newPos(3), - ), - ), - newPos(3), - ), - withElemPos( - id("add"), - newPos(3), - ), - withElemPos( - withLabel( - id("term"), - withLabelPos( - label("rhs"), - newPos(3), - ), - ), - newPos(3), - ), - ), - newPos(3), - ), - ), - newPos(2), - ), - }, - }, - }, - { - caption: "a label must be an identifier, not a string", - src: ` -foo - : bar@'baz' - ; -`, - synErr: synErrNoLabel, - }, - { - caption: "a label must be an identifier, not a pattern", - src: ` -foo - : bar@"baz" - ; -`, - synErr: synErrNoLabel, - }, - { - caption: "the symbol marker @ must be followed by an identifier", - src: ` -foo - : bar@ - ; -`, - synErr: synErrNoLabel, - }, - { - caption: "a symbol cannot have more than or equal to two labels", - src: ` -foo - : bar@baz@bra - ; -`, - synErr: synErrLabelWithNoSymbol, - }, - { - caption: "a label must follow a symbol", - src: ` -foo - : @baz - ; -`, - synErr: synErrLabelWithNoSymbol, - }, - { - caption: "a grammar can contain left and right associativities", - src: ` -#prec ( - #left l1 l2 - #left l3 - #right r1 r2 - #right r3 -); - -s - : id l1 id l2 id l3 id - | id r1 id r2 id r3 id - ; - -whitespaces #skip - : "[\u{0009}\u{0020}]+"; -l1 - : 'l1'; -l2 - : 'l2'; -l3 - : 'l3'; -r1 - : 'r1'; -r2 - : 'r2'; -r3 - : 'r3'; -id - : "[A-Za-z0-9_]+"; -`, - ast: &RootNode{ - Directives: []*DirectiveNode{ - withDirPos( - prec( - withParamPos( - group( - withDirPos( - leftAssoc( - withParamPos(idParam("l1"), newPos(3)), - withParamPos(idParam("l2"), newPos(3)), - ), - newPos(3), - ), - withDirPos( - leftAssoc( - withParamPos(idParam("l3"), newPos(4)), - ), - newPos(4), - ), - withDirPos( - rightAssoc( - withParamPos(idParam("r1"), newPos(5)), - withParamPos(idParam("r2"), newPos(5)), - ), - newPos(5), - ), - withDirPos( - rightAssoc( - withParamPos(idParam("r3"), newPos(6)), - ), - newPos(6), - ), - ), - newPos(2), - ), - ), - newPos(2), - ), - }, - Productions: []*ProductionNode{ - prod("s", - alt(id(`id`), id(`l1`), id(`id`), id(`l2`), id(`id`), id(`l3`), id(`id`)), - alt(id(`id`), id(`r1`), id(`id`), id(`r2`), id(`id`), id(`r3`), id(`id`)), - ), - }, - LexProductions: []*ProductionNode{ - withProdDir( - prod("whitespaces", - alt(pat(`[\u{0009}\u{0020}]+`)), - ), - dir("skip"), - ), - prod("l1", alt(pat(`l1`))), - prod("l2", alt(pat(`l2`))), - prod("l3", alt(pat(`l3`))), - prod("r1", alt(pat(`r1`))), - prod("r2", alt(pat(`r2`))), - prod("r3", alt(pat(`r3`))), - prod("id", alt(pat(`[A-Za-z0-9_]+`))), - }, - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - ast, err := Parse(strings.NewReader(tt.src)) - if tt.synErr != nil { - synErrs, ok := err.(verr.SpecErrors) - if !ok { - t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, err) - } - synErr := synErrs[0] - if tt.synErr != synErr.Cause { - t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, synErr.Cause) - } - if ast != nil { - t.Fatalf("AST must be nil") - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if ast == nil { - t.Fatalf("AST must be non-nil") - } - testRootNode(t, ast, tt.ast, tt.checkPosition) - } - }) - } -} - -func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { - t.Helper() - if len(root.Productions) != len(expected.Productions) { - t.Fatalf("unexpected length of productions; want: %v, got: %v", len(expected.Productions), len(root.Productions)) - } - if len(root.Directives) != len(expected.Directives) { - t.Fatalf("unexpected length of top-level directives; want: %v, got: %v", len(expected.Directives), len(root.Directives)) - } - for i, dir := range root.Directives { - testDirectives(t, []*DirectiveNode{dir}, []*DirectiveNode{expected.Directives[i]}, true) - } - for i, prod := range root.Productions { - testProductionNode(t, prod, expected.Productions[i], checkPosition) - } - for i, prod := range root.LexProductions { - testProductionNode(t, prod, expected.LexProductions[i], checkPosition) - } - for i, frag := range root.Fragments { - testFragmentNode(t, frag, expected.Fragments[i], checkPosition) - } -} - -func testProductionNode(t *testing.T, prod, expected *ProductionNode, checkPosition bool) { - t.Helper() - if len(expected.Directives) != len(prod.Directives) { - t.Fatalf("unexpected directive count; want: %v directives, got: %v directives", len(expected.Directives), len(prod.Directives)) - } - if len(expected.Directives) > 0 { - testDirectives(t, prod.Directives, expected.Directives, checkPosition) - } - if prod.LHS != expected.LHS { - t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, prod.LHS) - } - if len(prod.RHS) != len(expected.RHS) { - t.Fatalf("unexpected length of an RHS; want: %v, got: %v", len(expected.RHS), len(prod.RHS)) - } - for i, alt := range prod.RHS { - testAlternativeNode(t, alt, expected.RHS[i], checkPosition) - } - if checkPosition { - testPosition(t, prod.Pos, expected.Pos) - } -} - -func testFragmentNode(t *testing.T, frag, expected *FragmentNode, checkPosition bool) { - t.Helper() - if frag.LHS != expected.LHS { - t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, frag.LHS) - } - if frag.RHS != expected.RHS { - t.Fatalf("unexpected RHS; want: %v, got: %v", expected.RHS, frag.RHS) - } - if checkPosition { - testPosition(t, frag.Pos, expected.Pos) - } -} - -func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode, checkPosition bool) { - t.Helper() - if len(alt.Elements) != len(expected.Elements) { - t.Fatalf("unexpected length of elements; want: %v, got: %v", len(expected.Elements), len(alt.Elements)) - } - for i, elem := range alt.Elements { - testElementNode(t, elem, expected.Elements[i], checkPosition) - } - if len(alt.Directives) != len(expected.Directives) { - t.Fatalf("unexpected alternative directive count; want: %v directive, got: %v directive", len(expected.Directives), len(alt.Directives)) - } - if len(alt.Directives) > 0 { - testDirectives(t, alt.Directives, expected.Directives, checkPosition) - } - if checkPosition { - testPosition(t, alt.Pos, expected.Pos) - } -} - -func testElementNode(t *testing.T, elem, expected *ElementNode, checkPosition bool) { - t.Helper() - if elem.ID != expected.ID { - t.Fatalf("unexpected ID; want: %v, got: %v", expected.ID, elem.ID) - } - if elem.Pattern != expected.Pattern { - t.Fatalf("unexpected pattern; want: %v, got: %v", expected.Pattern, elem.Pattern) - } - if checkPosition { - testPosition(t, elem.Pos, expected.Pos) - } -} - -func testDirectives(t *testing.T, dirs, expected []*DirectiveNode, checkPosition bool) { - t.Helper() - for i, exp := range expected { - dir := dirs[i] - - if exp.Name != dir.Name { - t.Fatalf("unexpected directive name; want: %+v, got: %+v", exp.Name, dir.Name) - } - if len(exp.Parameters) != len(dir.Parameters) { - t.Fatalf("unexpected directive parameter; want: %+v, got: %+v", exp.Parameters, dir.Parameters) - } - for j, expParam := range exp.Parameters { - testParameter(t, dir.Parameters[j], expParam, checkPosition) - } - if checkPosition { - testPosition(t, dir.Pos, exp.Pos) - } - } -} - -func testParameter(t *testing.T, param, expected *ParameterNode, checkPosition bool) { - t.Helper() - if param.ID != expected.ID { - t.Fatalf("unexpected ID parameter; want: %v, got: %v", expected.ID, param.ID) - } - if param.String != expected.String { - t.Fatalf("unexpected string parameter; want: %v, got: %v", expected.ID, param.ID) - } - if param.Expansion != expected.Expansion { - t.Fatalf("unexpected expansion; want: %v, got: %v", expected.Expansion, param.Expansion) - } - if checkPosition { - testPosition(t, param.Pos, expected.Pos) - } -} - -func testPosition(t *testing.T, pos, expected Position) { - t.Helper() - if pos.Row != expected.Row { - t.Fatalf("unexpected position want: %+v, got: %+v", expected, pos) - } -} diff --git a/tests/unit/spec/test.go b/tests/unit/spec/test.go new file mode 100644 index 0000000..eddba92 --- /dev/null +++ b/tests/unit/spec/test.go @@ -0,0 +1,411 @@ +package test + +import ( + "fmt" + "reflect" + "strings" + "testing" +) + +func TestTree_Format(t *testing.T) { + expected := `(a + (b + (c)) + (d) + (e))` + tree := NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("c"), + ), + NewNonTerminalTree("d"), + NewNonTerminalTree("e"), + ) + actual := string(tree.Format()) + if actual != expected { + t.Fatalf("unexpected format:\n%v", actual) + } +} + +func TestDiffTree(t *testing.T) { + tests := []struct { + t1 *Tree + t2 *Tree + different bool + }{ + { + t1: NewTerminalNode("a", "a"), + t2: NewTerminalNode("a", "a"), + }, + { + t1: NewTerminalNode("a", "a"), + t2: NewTerminalNode("a", "A"), + different: true, + }, + { + t1: NewTerminalNode("a", "a"), + t2: NewTerminalNode("A", "a"), + different: true, + }, + { + t1: NewNonTerminalTree("a"), + t2: NewNonTerminalTree("a"), + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + NewNonTerminalTree("d"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + NewNonTerminalTree("d"), + ), + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("c"), + ), + NewNonTerminalTree("d", + NewNonTerminalTree("d"), + ), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("c"), + ), + NewNonTerminalTree("d", + NewNonTerminalTree("d"), + ), + ), + }, + { + t1: NewNonTerminalTree("a"), + t2: NewNonTerminalTree("b"), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + t2: NewNonTerminalTree("a"), + different: true, + }, + { + t1: NewNonTerminalTree("a"), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("c"), + ), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + NewNonTerminalTree("d"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + ), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b"), + NewNonTerminalTree("c"), + NewNonTerminalTree("d"), + ), + different: true, + }, + { + t1: NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("c"), + ), + ), + t2: NewNonTerminalTree("a", + NewNonTerminalTree("b", + NewNonTerminalTree("d"), + ), + ), + different: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + diffs := DiffTree(tt.t1, tt.t2) + if tt.different && len(diffs) == 0 { + t.Fatalf("unexpected result") + } else if !tt.different && len(diffs) > 0 { + t.Fatalf("unexpected result") + } + }) + } +} + +func TestParseTestCase(t *testing.T) { + tests := []struct { + src string + tc *TestCase + parseErr bool + }{ + { + src: `test +--- +foo +--- +(foo) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + { + src: ` +test + +--- + +foo + +--- + +(foo) + +`, + tc: &TestCase{ + Description: "\ntest\n", + Source: []byte("\nfoo\n"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + // The length of a part delimiter may be greater than 3. + { + src: ` +test +---- +foo +---- +(foo) +`, + tc: &TestCase{ + Description: "\ntest", + Source: []byte("foo"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + // The description part may be empty. + { + src: `---- +foo +---- +(foo) +`, + tc: &TestCase{ + Description: "", + Source: []byte("foo"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + // The source part may be empty. + { + src: `test +--- +--- +(foo) +`, + tc: &TestCase{ + Description: "test", + Source: []byte{}, + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + // NOTE: If there is a delimiter at the end of a test case, we really want to make it a syntax error, + // but we allow it to simplify the implementation of the parser. + { + src: `test +---- +foo +---- +(foo) +--- +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo"), + Output: NewNonTerminalTree("foo").Fill(), + }, + }, + { + src: ``, + parseErr: true, + }, + { + src: `test +--- +`, + parseErr: true, + }, + { + src: `test +--- +foo +`, + parseErr: true, + }, + { + src: `test +--- +foo +--- +`, + parseErr: true, + }, + { + src: `test +-- +foo +-- +(foo) +`, + parseErr: true, + }, + // A node may have just one string node. + { + src: `test +---- +foo bar +---- +(foo (bar 'bar')) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo bar"), + Output: NewNonTerminalTree("foo", + NewTerminalNode("bar", "bar"), + ).Fill(), + }, + }, + // A node may have just one pattern node. + { + src: `test +---- +foo bar +---- +(foo (bar "bar")) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo bar"), + Output: NewNonTerminalTree("foo", + NewTerminalNode("bar", "bar"), + ).Fill(), + }, + }, + // A node may be the error node. + { + src: `test +---- +foo x +---- +(foo (error)) +`, + tc: &TestCase{ + Description: "test", + Source: []byte("foo x"), + Output: NewNonTerminalTree("foo", + NewTerminalNode("error", ""), + ).Fill(), + }, + }, + // The error node cannot have a string node. + { + src: `test +---- +foo x +---- +(foo (error 'x')) +`, + parseErr: true, + }, + // The error node cannot have a pattern node. + { + src: `test +---- +foo x +---- +(foo (error "x")) +`, + parseErr: true, + }, + // The error node cannot have another node. + { + src: `test +---- +foo x +---- +(foo (error (a))) +`, + parseErr: true, + }, + { + src: `test +--- +foo +--- +? +`, + parseErr: true, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + tc, err := ParseTestCase(strings.NewReader(tt.src)) + if tt.parseErr { + if err == nil { + t.Fatalf("an expected error didn't occur") + } + } else { + if err != nil { + t.Fatal(err) + } + testTestCase(t, tt.tc, tc) + } + }) + } +} + +func testTestCase(t *testing.T, expected, actual *TestCase) { + t.Helper() + + if expected.Description != actual.Description || + !reflect.DeepEqual(expected.Source, actual.Source) || + len(DiffTree(expected.Output, actual.Output)) > 0 { + t.Fatalf("unexpected test case: want: %#v, got: %#v", expected, actual) + } +} diff --git a/tests/unit/spec/test/parser_test.go b/tests/unit/spec/test/parser_test.go deleted file mode 100644 index eddba92..0000000 --- a/tests/unit/spec/test/parser_test.go +++ /dev/null @@ -1,411 +0,0 @@ -package test - -import ( - "fmt" - "reflect" - "strings" - "testing" -) - -func TestTree_Format(t *testing.T) { - expected := `(a - (b - (c)) - (d) - (e))` - tree := NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("c"), - ), - NewNonTerminalTree("d"), - NewNonTerminalTree("e"), - ) - actual := string(tree.Format()) - if actual != expected { - t.Fatalf("unexpected format:\n%v", actual) - } -} - -func TestDiffTree(t *testing.T) { - tests := []struct { - t1 *Tree - t2 *Tree - different bool - }{ - { - t1: NewTerminalNode("a", "a"), - t2: NewTerminalNode("a", "a"), - }, - { - t1: NewTerminalNode("a", "a"), - t2: NewTerminalNode("a", "A"), - different: true, - }, - { - t1: NewTerminalNode("a", "a"), - t2: NewTerminalNode("A", "a"), - different: true, - }, - { - t1: NewNonTerminalTree("a"), - t2: NewNonTerminalTree("a"), - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - NewNonTerminalTree("d"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - NewNonTerminalTree("d"), - ), - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("c"), - ), - NewNonTerminalTree("d", - NewNonTerminalTree("d"), - ), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("c"), - ), - NewNonTerminalTree("d", - NewNonTerminalTree("d"), - ), - ), - }, - { - t1: NewNonTerminalTree("a"), - t2: NewNonTerminalTree("b"), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - t2: NewNonTerminalTree("a"), - different: true, - }, - { - t1: NewNonTerminalTree("a"), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("c"), - ), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - NewNonTerminalTree("d"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - ), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b"), - NewNonTerminalTree("c"), - NewNonTerminalTree("d"), - ), - different: true, - }, - { - t1: NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("c"), - ), - ), - t2: NewNonTerminalTree("a", - NewNonTerminalTree("b", - NewNonTerminalTree("d"), - ), - ), - different: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - diffs := DiffTree(tt.t1, tt.t2) - if tt.different && len(diffs) == 0 { - t.Fatalf("unexpected result") - } else if !tt.different && len(diffs) > 0 { - t.Fatalf("unexpected result") - } - }) - } -} - -func TestParseTestCase(t *testing.T) { - tests := []struct { - src string - tc *TestCase - parseErr bool - }{ - { - src: `test ---- -foo ---- -(foo) -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - { - src: ` -test - ---- - -foo - ---- - -(foo) - -`, - tc: &TestCase{ - Description: "\ntest\n", - Source: []byte("\nfoo\n"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - // The length of a part delimiter may be greater than 3. - { - src: ` -test ----- -foo ----- -(foo) -`, - tc: &TestCase{ - Description: "\ntest", - Source: []byte("foo"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - // The description part may be empty. - { - src: `---- -foo ----- -(foo) -`, - tc: &TestCase{ - Description: "", - Source: []byte("foo"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - // The source part may be empty. - { - src: `test ---- ---- -(foo) -`, - tc: &TestCase{ - Description: "test", - Source: []byte{}, - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - // NOTE: If there is a delimiter at the end of a test case, we really want to make it a syntax error, - // but we allow it to simplify the implementation of the parser. - { - src: `test ----- -foo ----- -(foo) ---- -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo"), - Output: NewNonTerminalTree("foo").Fill(), - }, - }, - { - src: ``, - parseErr: true, - }, - { - src: `test ---- -`, - parseErr: true, - }, - { - src: `test ---- -foo -`, - parseErr: true, - }, - { - src: `test ---- -foo ---- -`, - parseErr: true, - }, - { - src: `test --- -foo --- -(foo) -`, - parseErr: true, - }, - // A node may have just one string node. - { - src: `test ----- -foo bar ----- -(foo (bar 'bar')) -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo bar"), - Output: NewNonTerminalTree("foo", - NewTerminalNode("bar", "bar"), - ).Fill(), - }, - }, - // A node may have just one pattern node. - { - src: `test ----- -foo bar ----- -(foo (bar "bar")) -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo bar"), - Output: NewNonTerminalTree("foo", - NewTerminalNode("bar", "bar"), - ).Fill(), - }, - }, - // A node may be the error node. - { - src: `test ----- -foo x ----- -(foo (error)) -`, - tc: &TestCase{ - Description: "test", - Source: []byte("foo x"), - Output: NewNonTerminalTree("foo", - NewTerminalNode("error", ""), - ).Fill(), - }, - }, - // The error node cannot have a string node. - { - src: `test ----- -foo x ----- -(foo (error 'x')) -`, - parseErr: true, - }, - // The error node cannot have a pattern node. - { - src: `test ----- -foo x ----- -(foo (error "x")) -`, - parseErr: true, - }, - // The error node cannot have another node. - { - src: `test ----- -foo x ----- -(foo (error (a))) -`, - parseErr: true, - }, - { - src: `test ---- -foo ---- -? -`, - parseErr: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - tc, err := ParseTestCase(strings.NewReader(tt.src)) - if tt.parseErr { - if err == nil { - t.Fatalf("an expected error didn't occur") - } - } else { - if err != nil { - t.Fatal(err) - } - testTestCase(t, tt.tc, tc) - } - }) - } -} - -func testTestCase(t *testing.T, expected, actual *TestCase) { - t.Helper() - - if expected.Description != actual.Description || - !reflect.DeepEqual(expected.Source, actual.Source) || - len(DiffTree(expected.Output, actual.Output)) > 0 { - t.Fatalf("unexpected test case: want: %#v, got: %#v", expected, actual) - } -} -- cgit v1.2.3