Move existing test files to "urubu" subdirectory

author: EuAndreh <eu@euandre.org> 2024-12-11 17:12:50 -0300
committer: EuAndreh <eu@euandre.org> 2024-12-11 17:14:00 -0300
commit: 5e6886fa4ff6f4dd0145ec3e3f150f5123566c7e (patch)
tree: fe8f82a5ce2eb68ce9852535bc9ac06bad5b9598 /tests/unit/driver/parser.go
parent: Do the same single file consolidation on tests (diff)
download: urubu-5e6886fa4ff6f4dd0145ec3e3f150f5123566c7e.tar.gz
urubu-5e6886fa4ff6f4dd0145ec3e3f150f5123566c7e.tar.xz
1 files changed, 0 insertions, 1972 deletions
diff --git a/tests/unit/driver/parser.go b/tests/unit/driver/parser.go
deleted file mode 100644
index 31fec45..0000000
--- a/tests/unit/driver/parser.go
+++ /dev/null
@@ -1,1972 +0,0 @@
-package parser
-
-import (
-	"fmt"
-	"sort"
-	"strings"
-	"testing"
-
-	"urubu/grammar"
-	spec "urubu/spec/grammar"
-	"urubu/spec/grammar/parser"
-)
-
-func TestParserWithConflicts(t *testing.T) {
-	tests := []struct {
-		caption string
-		specSrc string
-		src     string
-		cst     *Node
-	}{
-		{
-			caption: "when a shift/reduce conflict occurred, we prioritize the shift action",
-			specSrc: `
-#name test;
-
-expr
-    : expr assign expr
-	| id
-	;
-
-id: "[A-Za-z0-9_]+";
-assign: '=';
-`,
-			src: `foo=bar=baz`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					termNode("id", "foo"),
-				),
-				termNode("assign", "="),
-				nonTermNode("expr",
-					nonTermNode("expr",
-						termNode("id", "bar"),
-					),
-					termNode("assign", "="),
-					nonTermNode("expr",
-						termNode("id", "baz"),
-					),
-				),
-			),
-		},
-		{
-			caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar",
-			specSrc: `
-#name test;
-
-s
-    : a
-	| b
-	;
-a
-    : id
-	;
-b
-    : id
-	;
-
-id: "[A-Za-z0-9_]+";
-`,
-			src: `foo`,
-			cst: nonTermNode("s",
-				nonTermNode("a",
-					termNode("id", "foo"),
-				),
-			),
-		},
-		{
-			caption: "left associativities defined earlier in the grammar have higher precedence",
-			specSrc: `
-#name test;
-
-#prec (
-    #left mul
-    #left add
-);
-
-expr
-    : expr add expr
-    | expr mul expr
-	| id
-    ;
-
-id: "[A-Za-z0-9_]+";
-add: '+';
-mul: '*';
-`,
-			src: `a+b*c*d+e`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					nonTermNode("expr",
-						termNode("id", "a"),
-					),
-					termNode("add", "+"),
-					nonTermNode("expr",
-						nonTermNode("expr",
-							nonTermNode("expr",
-								termNode("id", "b"),
-							),
-							termNode("mul", "*"),
-							nonTermNode("expr",
-								termNode("id", "c"),
-							),
-						),
-						termNode("mul", "*"),
-						nonTermNode("expr",
-							termNode("id", "d"),
-						),
-					),
-				),
-				termNode("add", "+"),
-				nonTermNode("expr",
-					termNode("id", "e"),
-				),
-			),
-		},
-		{
-			caption: "left associativities defined in the same line have the same precedence",
-			specSrc: `
-#name test;
-
-#prec (
-    #left add sub
-);
-
-expr
-    : expr add expr
-    | expr sub expr
-	| id
-    ;
-
-id: "[A-Za-z0-9_]+";
-add: '+';
-sub: '-';
-`,
-			src: `a-b+c+d-e`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					nonTermNode("expr",
-						nonTermNode("expr",
-							nonTermNode("expr",
-								termNode("id", "a"),
-							),
-							termNode("sub", "-"),
-							nonTermNode("expr",
-								termNode("id", "b"),
-							),
-						),
-						termNode("add", "+"),
-						nonTermNode("expr",
-							termNode("id", "c"),
-						),
-					),
-					termNode("add", "+"),
-					nonTermNode("expr",
-						termNode("id", "d"),
-					),
-				),
-				termNode("sub", "-"),
-				nonTermNode("expr",
-					termNode("id", "e"),
-				),
-			),
-		},
-		{
-			caption: "right associativities defined earlier in the grammar have higher precedence",
-			specSrc: `
-#name test;
-
-#prec (
-    #right r1
-    #right r2
-);
-
-expr
-    : expr r2 expr
-    | expr r1 expr
-	| id
-    ;
-
-whitespaces #skip
-    : "[\u{0009}\u{0020}]+";
-r1
-    : 'r1';
-r2
-    : 'r2';
-id
-    : "[A-Za-z0-9_]+";
-`,
-			src: `a r2 b r1 c r1 d r2 e`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					termNode("id", "a"),
-				),
-				termNode("r2", "r2"),
-				nonTermNode("expr",
-					nonTermNode("expr",
-						nonTermNode("expr",
-							termNode("id", "b"),
-						),
-						termNode("r1", "r1"),
-						nonTermNode("expr",
-							nonTermNode("expr",
-								termNode("id", "c"),
-							),
-							termNode("r1", "r1"),
-							nonTermNode("expr",
-								termNode("id", "d"),
-							),
-						),
-					),
-					termNode("r2", "r2"),
-					nonTermNode("expr",
-						termNode("id", "e"),
-					),
-				),
-			),
-		},
-		{
-			caption: "right associativities defined in the same line have the same precedence",
-			specSrc: `
-#name test;
-
-#prec (
-    #right r1 r2
-);
-
-expr
-    : expr r2 expr
-    | expr r1 expr
-	| id
-    ;
-
-whitespaces #skip
-    : "[\u{0009}\u{0020}]+";
-r1
-    : 'r1';
-r2
-    : 'r2';
-id
-    : "[A-Za-z0-9_]+";
-`,
-			src: `a r2 b r1 c r1 d r2 e`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					termNode("id", "a"),
-				),
-				termNode("r2", "r2"),
-				nonTermNode("expr",
-					nonTermNode("expr",
-						termNode("id", "b"),
-					),
-					termNode("r1", "r1"),
-					nonTermNode("expr",
-						nonTermNode("expr",
-							termNode("id", "c"),
-						),
-						termNode("r1", "r1"),
-						nonTermNode("expr",
-							nonTermNode("expr",
-								termNode("id", "d"),
-							),
-							termNode("r2", "r2"),
-							nonTermNode("expr",
-								termNode("id", "e"),
-							),
-						),
-					),
-				),
-			),
-		},
-		{
-			caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence",
-			specSrc: `
-#name test;
-
-#prec (
-    #assign a1
-    #assign a2
-);
-
-expr
-    : expr a2 expr
-    | expr a1 expr
-	| id
-    ;
-
-whitespaces #skip
-    : "[\u{0009}\u{0020}]+";
-a1
-    : 'a1';
-a2
-    : 'a2';
-id
-    : "[A-Za-z0-9_]+";
-`,
-			src: `a a2 b a1 c a1 d a2 e`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					termNode("id", "a"),
-				),
-				termNode("a2", "a2"),
-				nonTermNode("expr",
-					nonTermNode("expr",
-						nonTermNode("expr",
-							termNode("id", "b"),
-						),
-						termNode("a1", "a1"),
-						nonTermNode("expr",
-							nonTermNode("expr",
-								termNode("id", "c"),
-							),
-							termNode("a1", "a1"),
-							nonTermNode("expr",
-								termNode("id", "d"),
-							),
-						),
-					),
-					termNode("a2", "a2"),
-					nonTermNode("expr",
-						termNode("id", "e"),
-					),
-				),
-			),
-		},
-		{
-			caption: "terminal symbols with an #assign directive defined in the same line have the same precedence",
-			specSrc: `
-#name test;
-
-#prec (
-    #assign a1 a2
-);
-
-expr
-    : expr a2 expr
-    | expr a1 expr
-	| id
-    ;
-
-whitespaces #skip
-    : "[\u{0009}\u{0020}]+";
-a1
-    : 'a1';
-a2
-    : 'a2';
-id
-    : "[A-Za-z0-9_]+";
-`,
-			src: `a a2 b a1 c a1 d a2 e`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					termNode("id", "a"),
-				),
-				termNode("a2", "a2"),
-				nonTermNode("expr",
-					nonTermNode("expr",
-						termNode("id", "b"),
-					),
-					termNode("a1", "a1"),
-					nonTermNode("expr",
-						nonTermNode("expr",
-							termNode("id", "c"),
-						),
-						termNode("a1", "a1"),
-						nonTermNode("expr",
-							nonTermNode("expr",
-								termNode("id", "d"),
-							),
-							termNode("a2", "a2"),
-							nonTermNode("expr",
-								termNode("id", "e"),
-							),
-						),
-					),
-				),
-			),
-		},
-		{
-			caption: "#left, #right, and #assign can be mixed",
-			specSrc: `
-#name test;
-
-#prec (
-    #left mul div
-    #left add sub
-    #assign else
-    #assign then
-    #right assign
-);
-
-expr
-    : expr add expr
-    | expr sub expr
-    | expr mul expr
-    | expr div expr
-    | expr assign expr
-    | if expr then expr
-    | if expr then expr else expr
-    | id
-    ;
-
-ws #skip: "[\u{0009}\u{0020}]+";
-if: 'if';
-then: 'then';
-else: 'else';
-id: "[A-Za-z0-9_]+";
-add: '+';
-sub: '-';
-mul: '*';
-div: '/';
-assign: '=';
-`,
-			src: `x = y = a + b * c - d / e + if f then if g then h else i`,
-			cst: nonTermNode(
-				"expr",
-				nonTermNode("expr",
-					termNode("id", "x"),
-				),
-				termNode("assign", "="),
-				nonTermNode("expr",
-					nonTermNode("expr",
-						termNode("id", "y"),
-					),
-					termNode("assign", "="),
-					nonTermNode("expr",
-						nonTermNode("expr",
-							nonTermNode("expr",
-								nonTermNode("expr",
-									termNode("id", "a"),
-								),
-								termNode("add", "+"),
-								nonTermNode("expr",
-									nonTermNode("expr",
-										termNode("id", "b"),
-									),
-									termNode("mul", "*"),
-									nonTermNode("expr",
-										termNode("id", "c"),
-									),
-								),
-							),
-							termNode("sub", "-"),
-							nonTermNode("expr",
-								nonTermNode("expr",
-									termNode("id", "d"),
-								),
-								termNode("div", "/"),
-								nonTermNode("expr",
-									termNode("id", "e"),
-								),
-							),
-						),
-						termNode("add", "+"),
-						nonTermNode("expr",
-							termNode("if", "if"),
-							nonTermNode("expr",
-								termNode("id", "f"),
-							),
-							termNode("then", "then"),
-							nonTermNode("expr",
-								termNode("if", "if"),
-								nonTermNode("expr",
-									termNode("id", "g"),
-								),
-								termNode("then", "then"),
-								nonTermNode("expr",
-									termNode("id", "h"),
-								),
-								termNode("else", "else"),
-								nonTermNode("expr",
-									termNode("id", "i"),
-								),
-							),
-						),
-					),
-				),
-			),
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.caption, func(t *testing.T) {
-			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			b := grammar.GrammarBuilder{
-				AST: ast,
-			}
-			cg, _, err := b.Build()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			gram := NewGrammar(cg)
-			tb := NewDefaultSyntaxTreeBuilder()
-			p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb)))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			err = p.Parse()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			if tt.cst != nil {
-				testTree(t, tb.Tree(), tt.cst)
-			}
-		})
-	}
-}
-
-func TestParserWithLAC(t *testing.T) {
-	specSrc := `
-#name test;
-
-s
-    : t t
-	;
-t
-    : c t
-	| d
-	;
-
-c: 'c';
-d: 'd';
-`
-
-	src := `ccd`
-
-	actLogWithLAC := []string{
-		"shift/c",
-		"shift/c",
-		"shift/d",
-		"miss",
-	}
-
-	actLogWithoutLAC := []string{
-		"shift/c",
-		"shift/c",
-		"shift/d",
-		"reduce/t",
-		"reduce/t",
-		"reduce/t",
-		"miss",
-	}
-
-	ast, err := parser.Parse(strings.NewReader(specSrc))
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	b := grammar.GrammarBuilder{
-		AST: ast,
-	}
-	gram, _, err := b.Build()
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	t.Run("LAC is enabled", func(t *testing.T) {
-		semAct := &testSemAct{
-			gram: gram,
-		}
-
-		toks, err := NewTokenStream(gram, strings.NewReader(src))
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		err = p.Parse()
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if len(semAct.actLog) != len(actLogWithLAC) {
-			t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
-		}
-
-		for i, e := range actLogWithLAC {
-			if semAct.actLog[i] != e {
-				t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
-			}
-		}
-	})
-
-	t.Run("LAC is disabled", func(t *testing.T) {
-		semAct := &testSemAct{
-			gram: gram,
-		}
-
-		toks, err := NewTokenStream(gram, strings.NewReader(src))
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC())
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		err = p.Parse()
-		if err != nil {
-			t.Fatal(err)
-		}
-
-		if len(semAct.actLog) != len(actLogWithoutLAC) {
-			t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
-		}
-
-		for i, e := range actLogWithoutLAC {
-			if semAct.actLog[i] != e {
-				t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
-			}
-		}
-	})
-}
-
-func termNode(kind string, text string, children ...*Node) *Node {
-	return &Node{
-		Type:     NodeTypeTerminal,
-		KindName: kind,
-		Text:     text,
-		Children: children,
-	}
-}
-
-func errorNode() *Node {
-	return &Node{
-		Type:     NodeTypeError,
-		KindName: "error",
-	}
-}
-
-func nonTermNode(kind string, children ...*Node) *Node {
-	return &Node{
-		Type:     NodeTypeNonTerminal,
-		KindName: kind,
-		Children: children,
-	}
-}
-
-func TestParser_Parse(t *testing.T) {
-	tests := []struct {
-		specSrc string
-		src     string
-		synErr  bool
-		cst     *Node
-		ast     *Node
-	}{
-		{
-			specSrc: `
-#name test;
-
-expr
-    : expr add term
-    | term
-    ;
-term
-    : term mul factor
-    | factor
-    ;
-factor
-    : l_paren expr r_paren
-    | id
-    ;
-
-add
-    : '+';
-mul
-    : '*';
-l_paren
-    : '(';
-r_paren
-    : ')';
-id
-    : "[A-Za-z_][0-9A-Za-z_]*";
-`,
-			src: `(a+(b+c))*d+e`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					nonTermNode("term",
-						nonTermNode("term",
-							nonTermNode("factor",
-								termNode("l_paren", "("),
-								nonTermNode("expr",
-									nonTermNode("expr",
-										nonTermNode("term",
-											nonTermNode("factor",
-												termNode("id", "a"),
-											),
-										),
-									),
-									termNode("add", "+"),
-									nonTermNode("term",
-										nonTermNode("factor",
-											termNode("l_paren", "("),
-											nonTermNode("expr",
-												nonTermNode("expr",
-													nonTermNode("term",
-														nonTermNode("factor",
-															termNode("id", "b"),
-														),
-													),
-												),
-												termNode("add", "+"),
-												nonTermNode("term",
-													nonTermNode("factor",
-														termNode("id", "c"),
-													),
-												),
-											),
-											termNode("r_paren", ")"),
-										),
-									),
-								),
-								termNode("r_paren", ")"),
-							),
-						),
-						termNode("mul", "*"),
-						nonTermNode("factor",
-							termNode("id", "d"),
-						),
-					),
-				),
-				termNode("add", "+"),
-				nonTermNode("term",
-					nonTermNode("factor",
-						termNode("id", "e"),
-					),
-				),
-			),
-		},
-		// Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are
-		// not allowed in string literals.
-		{
-			specSrc: `
-#name test;
-
-s
-    : a b c
-    ;
-
-a
-    : '\f{foo}';
-b
-    : '\u{0000}';
-c
-    : '\p{gc=Letter}';
-`,
-			src: `\f{foo}\u{0000}\p{gc=Letter}`,
-			cst: nonTermNode("s",
-				termNode("a", `\f{foo}`),
-				termNode("b", `\u{0000}`),
-				termNode("c", `\p{gc=Letter}`),
-			),
-		},
-		// The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo bar
-    ;
-foo
-    :
-    ;
-bar
-    : bar_text
-    |
-    ;
-bar_text: "bar";
-`,
-			src: ``,
-			cst: nonTermNode("s",
-				nonTermNode("foo"),
-				nonTermNode("bar"),
-			),
-		},
-		// The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo bar
-    ;
-foo
-    :
-    ;
-bar
-    : bar_text
-    |
-    ;
-
-bar_text
-    : "bar";
-`,
-			src: `bar`,
-			cst: nonTermNode("s",
-				nonTermNode("foo"),
-				nonTermNode("bar",
-					termNode("bar_text", "bar"),
-				),
-			),
-		},
-		// A production can have multiple alternative productions.
-		{
-			specSrc: `
-#name test;
-
-#prec (
-    #assign $uminus
-    #left mul div
-    #left add sub
-);
-
-expr
-    : expr add expr
-    | expr sub expr
-    | expr mul expr
-    | expr div expr
-    | int
-    | sub int #prec $uminus // This 'sub' means the unary minus symbol.
-    ;
-
-int
-    : "0|[1-9][0-9]*";
-add
-    : '+';
-sub
-    : '-';
-mul
-    : '*';
-div
-    : '/';
-`,
-			src: `-1*-2+3-4/5`,
-			ast: nonTermNode("expr",
-				nonTermNode("expr",
-					nonTermNode("expr",
-						nonTermNode("expr",
-							termNode("sub", "-"),
-							termNode("int", "1"),
-						),
-						termNode("mul", "*"),
-						nonTermNode("expr",
-							termNode("sub", "-"),
-							termNode("int", "2"),
-						),
-					),
-					termNode("add", "+"),
-					nonTermNode("expr",
-						termNode("int", "3"),
-					),
-				),
-				termNode("sub", "-"),
-				nonTermNode("expr",
-					nonTermNode("expr",
-						termNode("int", "4"),
-					),
-					termNode("div", "/"),
-					nonTermNode("expr",
-						termNode("int", "5"),
-					),
-				),
-			),
-		},
-		// A lexical production can have multiple production directives.
-		{
-			specSrc: `
-#name test;
-
-s
-    : push_a push_b pop pop
-    ;
-
-push_a #mode default #push a
-    : '->a';
-push_b #mode a #push b
-    : '->b';
-pop #mode a b #pop
-    : '<-';
-`,
-			src: `->a->b<-<-`,
-			ast: nonTermNode("s",
-				termNode("push_a", "->a"),
-				termNode("push_b", "->b"),
-				termNode("pop", "<-"),
-				termNode("pop", "<-"),
-			),
-		},
-		{
-			specSrc: `
-#name test;
-
-mode_tran_seq
-    : mode_tran_seq mode_tran
-    | mode_tran
-    ;
-mode_tran
-    : push_m1
-    | push_m2
-    | pop_m1
-    | pop_m2
-    ;
-
-push_m1 #push m1
-    : "->";
-push_m2 #mode m1 #push m2
-    : "-->";
-pop_m1 #mode m1 #pop
-    : "<-";
-pop_m2 #mode m2 #pop
-    : "<--";
-whitespace #mode default m1 m2 #skip
-    : "\u{0020}+";
-`,
-			src: ` -> --> <-- <- `,
-		},
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo bar
-    ;
-
-foo
-    : "foo";
-bar #mode default
-    : "bar";
-`,
-			src: `foobar`,
-		},
-		// When #push and #pop are applied to the same symbol, #pop will run first, then #push.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo bar baz
-    ;
-
-foo #push m1
-    : 'foo';
-bar #mode m1 #pop #push m2
-    : 'bar';
-baz #mode m2
-    : 'baz';
-`,
-			src: `foobarbaz`,
-			ast: nonTermNode("s",
-				termNode("foo", "foo"),
-				termNode("bar", "bar"),
-				termNode("baz", "baz"),
-			),
-		},
-		// When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first
-		// in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which
-		// they are executed.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo bar baz
-    ;
-
-foo #push m1
-    : 'foo';
-bar #mode m1 #push m2 #pop
-    : 'bar';
-baz #mode m2
-    : 'baz';
-`,
-			src: `foobarbaz`,
-			ast: nonTermNode("s",
-				termNode("foo", "foo"),
-				termNode("bar", "bar"),
-				termNode("baz", "baz"),
-			),
-		},
-		// The parser can skips specified tokens.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo bar
-    ;
-
-foo
-    : "foo";
-bar
-    : "bar";
-white_space #skip
-    : "[\u{0009}\u{0020}]+";
-`,
-			src: `foo bar`,
-		},
-		// A grammar can contain fragments.
-		{
-			specSrc: `
-#name test;
-
-s
-    : tagline
-    ;
-tagline
-    : "\f{words} IS OUT THERE.";
-fragment words
-    : "[A-Za-z\u{0020}]+";
-`,
-			src: `THE TRUTH IS OUT THERE.`,
-		},
-		// A grammar can contain ast actions.
-		{
-			specSrc: `
-#name test;
-
-list
-    : l_bracket elems r_bracket #ast elems...
-    ;
-elems
-    : elems comma id #ast elems... id
-    | id
-    ;
-
-whitespace #skip
-    : "\u{0020}+";
-l_bracket
-    : '[';
-r_bracket
-    : ']';
-comma
-    : ',';
-id
-    : "[A-Za-z]+";
-`,
-			src: `[Byers, Frohike, Langly]`,
-			cst: nonTermNode("list",
-				termNode("x_1", "["),
-				nonTermNode("elems",
-					nonTermNode("elems",
-						nonTermNode("elems",
-							termNode("id", "Byers"),
-						),
-						termNode("x_3", ","),
-						termNode("id", "Frohike"),
-					),
-					termNode("x_3", ","),
-					termNode("id", "Langly"),
-				),
-				termNode("x_2", "]"),
-			),
-			ast: nonTermNode("list",
-				termNode("id", "Byers"),
-				termNode("id", "Frohike"),
-				termNode("id", "Langly"),
-			),
-		},
-		// The '...' operator can expand child nodes.
-		{
-			specSrc: `
-#name test;
-
-s
-    : a #ast a...
-    ;
-a
-    : a comma foo #ast a... foo
-    | foo
-    ;
-
-comma
-    : ',';
-foo
-    : 'foo';
-`,
-			src: `foo,foo,foo`,
-			ast: nonTermNode("s",
-				termNode("foo", "foo"),
-				termNode("foo", "foo"),
-				termNode("foo", "foo"),
-			),
-		},
-		// The '...' operator also can applied to an element having no children.
-		{
-			specSrc: `
-#name test;
-
-s
-    : a semi_colon #ast a...
-    ;
-a
-    :
-    ;
-
-semi_colon
-    : ';';
-`,
-			src: `;`,
-			ast: nonTermNode("s"),
-		},
-		// A label can be a parameter of #ast directive.
-		{
-			specSrc: `
-#name test;
-
-#prec (
-    #left add sub
-);
-
-expr
-    : expr@lhs add expr@rhs #ast add lhs rhs
-    | expr@lhs sub expr@rhs #ast sub lhs rhs
-    | num
-    ;
-
-add
-    : '+';
-sub
-    : '-';
-num
-    : "0|[1-9][0-9]*";
-`,
-			src: `1+2-3`,
-			ast: nonTermNode("expr",
-				termNode("sub", "-"),
-				nonTermNode("expr",
-					termNode("add", "+"),
-					nonTermNode("expr",
-						termNode("num", "1"),
-					),
-					nonTermNode("expr",
-						termNode("num", "2"),
-					),
-				),
-				nonTermNode("expr",
-					termNode("num", "3"),
-				),
-			),
-		},
-		// An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo@x semi_colon #ast foo
-    ;
-
-semi_colon
-    : ';';
-foo
-    : 'foo';
-`,
-			src: `foo;`,
-			ast: nonTermNode("s",
-				termNode("foo", "foo"),
-			),
-		},
-		// A production has the same precedence and associativity as the right-most terminal symbol.
-		{
-			specSrc: `
-#name test;
-
-#prec (
-    #left add
-);
-
-expr
-    : expr add expr // This alternative has the same precedence and associativiry as 'add'.
-    | int
-    ;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-int
-    : "0|[1-9][0-9]*";
-add
-    : '+';
-`,
-			// This source is recognized as the following structure because the production `expr → expr add expr` has the same
-			// precedence and associativity as the symbol 'add'.
-			//
-			// ((1+2)+3)
-			//
-			// If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence
-			// and associativity and this source will be recognized as the following structure.
-			//
-			// (1+(2+3))
-			src: `1+2+3`,
-			ast: nonTermNode("expr",
-				nonTermNode("expr",
-					nonTermNode("expr",
-						termNode("int", "1"),
-					),
-					termNode("add", "+"),
-					nonTermNode("expr",
-						termNode("int", "2"),
-					),
-				),
-				termNode("add", "+"),
-				nonTermNode("expr",
-					termNode("int", "3"),
-				),
-			),
-		},
-		// The 'prec' directive can set precedence of a production.
-		{
-			specSrc: `
-#name test;
-
-#prec (
-    #assign $uminus
-    #left mul div
-    #left add sub
-);
-
-expr
-    : expr add expr
-    | expr sub expr
-    | expr mul expr
-    | expr div expr
-    | int
-    | sub int #prec $uminus // This 'sub' means a unary minus symbol.
-    ;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-int
-    : "0|[1-9][0-9]*";
-add
-    : '+';
-sub
-    : '-';
-mul
-    : '*';
-div
-    : '/';
-`,
-			// This source is recognized as the following structure because the production `expr → sub expr`
-			// has the `#prec mul` directive and has the same precedence of the symbol `mul`.
-			//
-			// (((-1) * 20) / 5)
-			//
-			// If the production doesn't have the `#prec` directive, this source will be recognized as
-			// the following structure.
-			//
-			// (- ((1 * 20) / 5))
-			src: `-1*20/5`,
-			cst: nonTermNode("expr",
-				nonTermNode("expr",
-					nonTermNode("expr",
-						termNode("sub", "-"),
-						termNode("int", "1"),
-					),
-					termNode("mul", "*"),
-					nonTermNode("expr",
-						termNode("int", "20"),
-					),
-				),
-				termNode("div", "/"),
-				nonTermNode("expr",
-					termNode("int", "5"),
-				),
-			),
-		},
-		// The grammar can contain the 'error' symbol.
-		{
-			specSrc: `
-#name test;
-
-s
-    : id id id semi_colon
-    | error semi_colon
-    ;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-semi_colon
-    : ';';
-id
-    : "[A-Za-z_]+";
-`,
-			src: `foo bar baz ;`,
-		},
-		// The 'error' symbol can appear in an #ast directive.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo semi_colon
-    | error semi_colon #ast error
-    ;
-
-semi_colon
-    : ';';
-foo
-    : 'foo';
-`,
-			src:    `bar;`,
-			synErr: true,
-			ast: nonTermNode("s",
-				errorNode(),
-			),
-		},
-		// The 'error' symbol can have a label, and an #ast can reference it.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo semi_colon
-    | error@e semi_colon #ast e
-    ;
-
-semi_colon
-    : ';';
-foo
-    : 'foo';
-`,
-			src:    `bar;`,
-			synErr: true,
-			ast: nonTermNode("s",
-				errorNode(),
-			),
-		},
-		// The grammar can contain the 'recover' directive.
-		{
-			specSrc: `
-#name test;
-
-seq
-    : seq elem
-    | elem
-    ;
-elem
-    : id id id semi_colon
-    | error semi_colon #recover
-    ;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-semi_colon
-    : ';';
-id
-    : "[A-Za-z_]+";
-`,
-			src: `a b c ; d e f ;`,
-		},
-		// The same label can be used between different alternatives.
-		{
-			specSrc: `
-#name test;
-
-s
-    : foo@x bar
-    | foo@x
-    ;
-
-foo: 'foo';
-bar: 'bar';
-`,
-			src: `foo`,
-		},
-	}
-
-	for i, tt := range tests {
-		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
-			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			b := grammar.GrammarBuilder{
-				AST: ast,
-			}
-			cg, _, err := b.Build()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			gram := NewGrammar(cg)
-			tb := NewDefaultSyntaxTreeBuilder()
-			var opt []ParserOption
-			switch {
-			case tt.ast != nil:
-				opt = append(opt, SemanticAction(NewASTActionSet(gram, tb)))
-			case tt.cst != nil:
-				opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb)))
-			}
-			p, err := NewParser(toks, gram, opt...)
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			err = p.Parse()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			if !tt.synErr && len(p.SyntaxErrors()) > 0 {
-				for _, synErr := range p.SyntaxErrors() {
-					t.Fatalf("unexpected syntax errors occurred: %v", synErr)
-				}
-			}
-
-			switch {
-			case tt.ast != nil:
-				testTree(t, tb.Tree(), tt.ast)
-			case tt.cst != nil:
-				testTree(t, tb.Tree(), tt.cst)
-			}
-		})
-	}
-}
-
-func testTree(t *testing.T, node, expected *Node) {
-	t.Helper()
-
-	if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text {
-		t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node)
-	}
-	if len(node.Children) != len(expected.Children) {
-		t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children))
-	}
-	for i, c := range node.Children {
-		testTree(t, c, expected.Children[i])
-	}
-}
-
-type testSemAct struct {
-	gram   *spec.CompiledGrammar
-	actLog []string
-}
-
-func (a *testSemAct) Shift(tok VToken, recovered bool) {
-	t := a.gram.Syntactic.Terminals[tok.TerminalID()]
-	if recovered {
-		a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t))
-	} else {
-		a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t))
-	}
-}
-
-func (a *testSemAct) Reduce(prodNum int, recovered bool) {
-	lhsSym := a.gram.Syntactic.LHSSymbols[prodNum]
-	lhsText := a.gram.Syntactic.NonTerminals[lhsSym]
-	if recovered {
-		a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText))
-	} else {
-		a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText))
-	}
-}
-
-func (a *testSemAct) Accept() {
-	a.actLog = append(a.actLog, "accept")
-}
-
-func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) {
-	a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped))
-}
-
-func (a *testSemAct) MissError(cause VToken) {
-	a.actLog = append(a.actLog, "miss")
-}
-
-func TestParserWithSemanticAction(t *testing.T) {
-	specSrcWithErrorProd := `
-#name test;
-
-seq
-    : seq elem semicolon
-	| elem semicolon
-    | error star star semicolon
-	| error semicolon #recover
-	;
-elem
-    : char char char
-	;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-semicolon
-    : ';';
-star
-    : '*';
-char
-    : "[a-z]";
-`
-
-	specSrcWithoutErrorProd := `
-#name test;
-
-seq
-    : seq elem semicolon
-	| elem semicolon
-	;
-elem
-    : char char char
-	;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-semicolon
-    : ';';
-char
-    : "[a-z]";
-`
-
-	tests := []struct {
-		caption string
-		specSrc string
-		src     string
-		actLog  []string
-	}{
-		{
-			caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.",
-			specSrc: specSrcWithErrorProd,
-			src:     `a b c; d e f;`,
-			actLog: []string{
-				"shift/char",
-				"shift/char",
-				"shift/char",
-				"reduce/elem",
-				"shift/semicolon",
-				"reduce/seq",
-
-				"shift/char",
-				"shift/char",
-				"shift/char",
-				"reduce/elem",
-				"shift/semicolon",
-				"reduce/seq",
-
-				"accept",
-			},
-		},
-		{
-			caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.",
-			specSrc: specSrcWithErrorProd,
-			src:     `a; b !; c d !; e ! * *; h i j;`,
-			actLog: []string{
-				"shift/char",
-				"trap/1/shift/error",
-				"shift/semicolon",
-				"reduce/seq/recovered",
-
-				"shift/char",
-				"trap/2/shift/error",
-				"shift/semicolon",
-				"reduce/seq/recovered",
-
-				"shift/char",
-				"shift/char",
-				"trap/3/shift/error",
-				"shift/semicolon",
-				"reduce/seq/recovered",
-
-				"shift/char",
-				"trap/2/shift/error",
-				"shift/star",
-				"shift/star",
-				// When the driver shifts three times, it recovers from an error.
-				"shift/semicolon/recovered",
-				"reduce/seq",
-
-				"shift/char",
-				"shift/char",
-				"shift/char",
-				"reduce/elem",
-				"shift/semicolon",
-				"reduce/seq",
-
-				// Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted
-				// according to the error production.
-				"accept",
-			},
-		},
-		{
-			caption: "when the input doesn't meet the error production, the driver calls `MissError`.",
-			specSrc: specSrcWithErrorProd,
-			src:     `a !`,
-			actLog: []string{
-				"shift/char",
-				"trap/1/shift/error",
-
-				"miss",
-			},
-		},
-		{
-			caption: "when a syntax error isn't trapped, the driver calls `MissError`.",
-			specSrc: specSrcWithoutErrorProd,
-			src:     `a !`,
-			actLog: []string{
-				"shift/char",
-
-				"miss",
-			},
-		},
-	}
-	for _, tt := range tests {
-		t.Run(tt.caption, func(t *testing.T) {
-			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			b := grammar.GrammarBuilder{
-				AST: ast,
-			}
-			gram, _, err := b.Build()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			semAct := &testSemAct{
-				gram: gram,
-			}
-			p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			err = p.Parse()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			if len(semAct.actLog) != len(tt.actLog) {
-				t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
-			}
-
-			for i, e := range tt.actLog {
-				if semAct.actLog[i] != e {
-					t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
-				}
-			}
-		})
-	}
-}
-
-func TestParserWithSyntaxErrors(t *testing.T) {
-	tests := []struct {
-		caption     string
-		specSrc     string
-		src         string
-		synErrCount int
-	}{
-		{
-			caption: "the parser can report a syntax error",
-			specSrc: `
-#name test;
-
-s
-    : foo
-    ;
-
-foo
-    : 'foo';
-`,
-			src:         `bar`,
-			synErrCount: 1,
-		},
-		{
-			caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state",
-			specSrc: `
-#name test;
-
-seq
-    : seq elem semi_colon
-	| elem semi_colon
-	| error semi_colon #recover
-	;
-elem
-    : a b c
-	;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-semi_colon
-    : ';';
-a
-    : 'a';
-b
-    : 'b';
-c
-    : 'c';
-`,
-			src:         `!; a!; ab!;`,
-			synErrCount: 3,
-		},
-		{
-			caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears",
-			specSrc: `
-#name test;
-
-seq
-    : seq elem semi_colon
-	| elem semi_colon
-	| error semi_colon #recover
-	;
-elem
-    : a b c
-	;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-semi_colon
-    : ';';
-a
-    : 'a';
-b
-    : 'b';
-c
-    : 'c';
-`,
-			// After the parser trasits to the error state reading the first invalid symbol ('!'),
-			// the second and third invalid symbols ('!') are ignored.
-			src:         `! ! !; a!; ab!;`,
-			synErrCount: 3,
-		},
-		{
-			caption: "when the parser performs shift three times, the parser recovers from the error state",
-			specSrc: `
-#name test;
-
-seq
-    : seq elem semi_colon
-	| elem semi_colon
-	| error star star semi_colon
-	;
-elem
-    : a b c
-	;
-
-ws #skip
-    : "[\u{0009}\u{0020}]+";
-semi_colon
-    : ';';
-star
-    : '*';
-a
-    : 'a';
-b
-    : 'b';
-c
-    : 'c';
-`,
-			src:         `!**; a!**; ab!**; abc!`,
-			synErrCount: 4,
-		},
-	}
-	for i, tt := range tests {
-		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
-			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			b := grammar.GrammarBuilder{
-				AST: ast,
-			}
-			gram, _, err := b.Build()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			p, err := NewParser(toks, NewGrammar(gram))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			err = p.Parse()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			synErrs := p.SyntaxErrors()
-			if len(synErrs) != tt.synErrCount {
-				t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs))
-			}
-		})
-	}
-}
-
-func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) {
-	tests := []struct {
-		caption  string
-		specSrc  string
-		src      string
-		cause    string
-		expected []string
-	}{
-		{
-			caption: "the parser reports an expected lookahead symbol",
-			specSrc: `
-#name test;
-
-s
-    : foo
-    ;
-
-foo
-    : 'foo';
-`,
-			src:   `bar`,
-			cause: `bar`,
-			expected: []string{
-				"foo",
-			},
-		},
-		{
-			caption: "the parser reports expected lookahead symbols",
-			specSrc: `
-#name test;
-
-s
-    : foo
-    | bar
-    ;
-
-foo
-    : 'foo';
-bar
-    : 'bar';
-`,
-			src:   `baz`,
-			cause: `baz`,
-			expected: []string{
-				"foo",
-				"bar",
-			},
-		},
-		{
-			caption: "the parser may report the EOF as an expected lookahead symbol",
-			specSrc: `
-#name test;
-
-s
-    : foo
-    ;
-
-foo
-    : 'foo';
-`,
-			src:   `foobar`,
-			cause: `bar`,
-			expected: []string{
-				"<eof>",
-			},
-		},
-		{
-			caption: "the parser may report the EOF and others as expected lookahead symbols",
-			specSrc: `
-#name test;
-
-s
-    : foo
-    |
-    ;
-
-foo
-    : 'foo';
-`,
-			src:   `bar`,
-			cause: `bar`,
-			expected: []string{
-				"foo",
-				"<eof>",
-			},
-		},
-	}
-	for i, tt := range tests {
-		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
-			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			b := grammar.GrammarBuilder{
-				AST: ast,
-			}
-			gram, _, err := b.Build()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			p, err := NewParser(toks, NewGrammar(gram))
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			err = p.Parse()
-			if err != nil {
-				t.Fatal(err)
-			}
-
-			synErrs := p.SyntaxErrors()
-			if synErrs == nil {
-				t.Fatalf("expected one syntax error, but it didn't occur")
-			}
-			if len(synErrs) != 1 {
-				t.Fatalf("too many syntax errors: %v errors", len(synErrs))
-			}
-			synErr := synErrs[0]
-			if string(synErr.Token.Lexeme()) != tt.cause {
-				t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme()))
-			}
-			if len(synErr.ExpectedTerminals) != len(tt.expected) {
-				t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals)
-			}
-			sort.Slice(tt.expected, func(i, j int) bool {
-				return tt.expected[i] < tt.expected[j]
-			})
-			sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool {
-				return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j]
-			})
-			for i, e := range tt.expected {
-				if synErr.ExpectedTerminals[i] != e {
-					t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i])
-				}
-			}
-		})
-	}
-}
author	EuAndreh <eu@euandre.org>	2024-12-11 17:12:50 -0300
committer	EuAndreh <eu@euandre.org>	2024-12-11 17:14:00 -0300
commit	5e6886fa4ff6f4dd0145ec3e3f150f5123566c7e (patch)
tree	fe8f82a5ce2eb68ce9852535bc9ac06bad5b9598 /tests/unit/driver/parser.go
parent	Do the same single file consolidation on tests (diff)
download	urubu-5e6886fa4ff6f4dd0145ec3e3f150f5123566c7e.tar.gz urubu-5e6886fa4ff6f4dd0145ec3e3f150f5123566c7e.tar.xz