package parser

import (
	"fmt"
	"sort"
	"strings"
	"testing"

	"urubu/grammar"
	spec "urubu/spec/grammar"
	"urubu/spec/grammar/parser"
)

func TestParserWithConflicts(t *testing.T) {
	tests := []struct {
		caption string
		specSrc string
		src     string
		cst     *Node
	}{
		{
			caption: "when a shift/reduce conflict occurred, we prioritize the shift action",
			specSrc: `
#name test;

expr
    : expr assign expr
	| id
	;

id: "[A-Za-z0-9_]+";
assign: '=';
`,
			src: `foo=bar=baz`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					termNode("id", "foo"),
				),
				termNode("assign", "="),
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("id", "bar"),
					),
					termNode("assign", "="),
					nonTermNode("expr",
						termNode("id", "baz"),
					),
				),
			),
		},
		{
			caption: "when a reduce/reduce conflict occurred, we prioritize the production defined earlier in the grammar",
			specSrc: `
#name test;

s
    : a
	| b
	;
a
    : id
	;
b
    : id
	;

id: "[A-Za-z0-9_]+";
`,
			src: `foo`,
			cst: nonTermNode("s",
				nonTermNode("a",
					termNode("id", "foo"),
				),
			),
		},
		{
			caption: "left associativities defined earlier in the grammar have higher precedence",
			specSrc: `
#name test;

#prec (
    #left mul
    #left add
);

expr
    : expr add expr
    | expr mul expr
	| id
    ;

id: "[A-Za-z0-9_]+";
add: '+';
mul: '*';
`,
			src: `a+b*c*d+e`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("id", "a"),
					),
					termNode("add", "+"),
					nonTermNode("expr",
						nonTermNode("expr",
							nonTermNode("expr",
								termNode("id", "b"),
							),
							termNode("mul", "*"),
							nonTermNode("expr",
								termNode("id", "c"),
							),
						),
						termNode("mul", "*"),
						nonTermNode("expr",
							termNode("id", "d"),
						),
					),
				),
				termNode("add", "+"),
				nonTermNode("expr",
					termNode("id", "e"),
				),
			),
		},
		{
			caption: "left associativities defined in the same line have the same precedence",
			specSrc: `
#name test;

#prec (
    #left add sub
);

expr
    : expr add expr
    | expr sub expr
	| id
    ;

id: "[A-Za-z0-9_]+";
add: '+';
sub: '-';
`,
			src: `a-b+c+d-e`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("expr",
						nonTermNode("expr",
							nonTermNode("expr",
								termNode("id", "a"),
							),
							termNode("sub", "-"),
							nonTermNode("expr",
								termNode("id", "b"),
							),
						),
						termNode("add", "+"),
						nonTermNode("expr",
							termNode("id", "c"),
						),
					),
					termNode("add", "+"),
					nonTermNode("expr",
						termNode("id", "d"),
					),
				),
				termNode("sub", "-"),
				nonTermNode("expr",
					termNode("id", "e"),
				),
			),
		},
		{
			caption: "right associativities defined earlier in the grammar have higher precedence",
			specSrc: `
#name test;

#prec (
    #right r1
    #right r2
);

expr
    : expr r2 expr
    | expr r1 expr
	| id
    ;

whitespaces #skip
    : "[\u{0009}\u{0020}]+";
r1
    : 'r1';
r2
    : 'r2';
id
    : "[A-Za-z0-9_]+";
`,
			src: `a r2 b r1 c r1 d r2 e`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					termNode("id", "a"),
				),
				termNode("r2", "r2"),
				nonTermNode("expr",
					nonTermNode("expr",
						nonTermNode("expr",
							termNode("id", "b"),
						),
						termNode("r1", "r1"),
						nonTermNode("expr",
							nonTermNode("expr",
								termNode("id", "c"),
							),
							termNode("r1", "r1"),
							nonTermNode("expr",
								termNode("id", "d"),
							),
						),
					),
					termNode("r2", "r2"),
					nonTermNode("expr",
						termNode("id", "e"),
					),
				),
			),
		},
		{
			caption: "right associativities defined in the same line have the same precedence",
			specSrc: `
#name test;

#prec (
    #right r1 r2
);

expr
    : expr r2 expr
    | expr r1 expr
	| id
    ;

whitespaces #skip
    : "[\u{0009}\u{0020}]+";
r1
    : 'r1';
r2
    : 'r2';
id
    : "[A-Za-z0-9_]+";
`,
			src: `a r2 b r1 c r1 d r2 e`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					termNode("id", "a"),
				),
				termNode("r2", "r2"),
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("id", "b"),
					),
					termNode("r1", "r1"),
					nonTermNode("expr",
						nonTermNode("expr",
							termNode("id", "c"),
						),
						termNode("r1", "r1"),
						nonTermNode("expr",
							nonTermNode("expr",
								termNode("id", "d"),
							),
							termNode("r2", "r2"),
							nonTermNode("expr",
								termNode("id", "e"),
							),
						),
					),
				),
			),
		},
		{
			caption: "terminal symbols with an #assign directive defined earlier in the grammar have higher precedence",
			specSrc: `
#name test;

#prec (
    #assign a1
    #assign a2
);

expr
    : expr a2 expr
    | expr a1 expr
	| id
    ;

whitespaces #skip
    : "[\u{0009}\u{0020}]+";
a1
    : 'a1';
a2
    : 'a2';
id
    : "[A-Za-z0-9_]+";
`,
			src: `a a2 b a1 c a1 d a2 e`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					termNode("id", "a"),
				),
				termNode("a2", "a2"),
				nonTermNode("expr",
					nonTermNode("expr",
						nonTermNode("expr",
							termNode("id", "b"),
						),
						termNode("a1", "a1"),
						nonTermNode("expr",
							nonTermNode("expr",
								termNode("id", "c"),
							),
							termNode("a1", "a1"),
							nonTermNode("expr",
								termNode("id", "d"),
							),
						),
					),
					termNode("a2", "a2"),
					nonTermNode("expr",
						termNode("id", "e"),
					),
				),
			),
		},
		{
			caption: "terminal symbols with an #assign directive defined in the same line have the same precedence",
			specSrc: `
#name test;

#prec (
    #assign a1 a2
);

expr
    : expr a2 expr
    | expr a1 expr
	| id
    ;

whitespaces #skip
    : "[\u{0009}\u{0020}]+";
a1
    : 'a1';
a2
    : 'a2';
id
    : "[A-Za-z0-9_]+";
`,
			src: `a a2 b a1 c a1 d a2 e`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					termNode("id", "a"),
				),
				termNode("a2", "a2"),
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("id", "b"),
					),
					termNode("a1", "a1"),
					nonTermNode("expr",
						nonTermNode("expr",
							termNode("id", "c"),
						),
						termNode("a1", "a1"),
						nonTermNode("expr",
							nonTermNode("expr",
								termNode("id", "d"),
							),
							termNode("a2", "a2"),
							nonTermNode("expr",
								termNode("id", "e"),
							),
						),
					),
				),
			),
		},
		{
			caption: "#left, #right, and #assign can be mixed",
			specSrc: `
#name test;

#prec (
    #left mul div
    #left add sub
    #assign else
    #assign then
    #right assign
);

expr
    : expr add expr
    | expr sub expr
    | expr mul expr
    | expr div expr
    | expr assign expr
    | if expr then expr
    | if expr then expr else expr
    | id
    ;

ws #skip: "[\u{0009}\u{0020}]+";
if: 'if';
then: 'then';
else: 'else';
id: "[A-Za-z0-9_]+";
add: '+';
sub: '-';
mul: '*';
div: '/';
assign: '=';
`,
			src: `x = y = a + b * c - d / e + if f then if g then h else i`,
			cst: nonTermNode(
				"expr",
				nonTermNode("expr",
					termNode("id", "x"),
				),
				termNode("assign", "="),
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("id", "y"),
					),
					termNode("assign", "="),
					nonTermNode("expr",
						nonTermNode("expr",
							nonTermNode("expr",
								nonTermNode("expr",
									termNode("id", "a"),
								),
								termNode("add", "+"),
								nonTermNode("expr",
									nonTermNode("expr",
										termNode("id", "b"),
									),
									termNode("mul", "*"),
									nonTermNode("expr",
										termNode("id", "c"),
									),
								),
							),
							termNode("sub", "-"),
							nonTermNode("expr",
								nonTermNode("expr",
									termNode("id", "d"),
								),
								termNode("div", "/"),
								nonTermNode("expr",
									termNode("id", "e"),
								),
							),
						),
						termNode("add", "+"),
						nonTermNode("expr",
							termNode("if", "if"),
							nonTermNode("expr",
								termNode("id", "f"),
							),
							termNode("then", "then"),
							nonTermNode("expr",
								termNode("if", "if"),
								nonTermNode("expr",
									termNode("id", "g"),
								),
								termNode("then", "then"),
								nonTermNode("expr",
									termNode("id", "h"),
								),
								termNode("else", "else"),
								nonTermNode("expr",
									termNode("id", "i"),
								),
							),
						),
					),
				),
			),
		},
	}

	for _, tt := range tests {
		t.Run(tt.caption, func(t *testing.T) {
			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
			if err != nil {
				t.Fatal(err)
			}

			b := grammar.GrammarBuilder{
				AST: ast,
			}
			cg, _, err := b.Build()
			if err != nil {
				t.Fatal(err)
			}

			toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
			if err != nil {
				t.Fatal(err)
			}

			gram := NewGrammar(cg)
			tb := NewDefaultSyntaxTreeBuilder()
			p, err := NewParser(toks, gram, SemanticAction(NewCSTActionSet(gram, tb)))
			if err != nil {
				t.Fatal(err)
			}

			err = p.Parse()
			if err != nil {
				t.Fatal(err)
			}

			if tt.cst != nil {
				testTree(t, tb.Tree(), tt.cst)
			}
		})
	}
}

func TestParserWithLAC(t *testing.T) {
	specSrc := `
#name test;

s
    : t t
	;
t
    : c t
	| d
	;

c: 'c';
d: 'd';
`

	src := `ccd`

	actLogWithLAC := []string{
		"shift/c",
		"shift/c",
		"shift/d",
		"miss",
	}

	actLogWithoutLAC := []string{
		"shift/c",
		"shift/c",
		"shift/d",
		"reduce/t",
		"reduce/t",
		"reduce/t",
		"miss",
	}

	ast, err := parser.Parse(strings.NewReader(specSrc))
	if err != nil {
		t.Fatal(err)
	}

	b := grammar.GrammarBuilder{
		AST: ast,
	}
	gram, _, err := b.Build()
	if err != nil {
		t.Fatal(err)
	}

	t.Run("LAC is enabled", func(t *testing.T) {
		semAct := &testSemAct{
			gram: gram,
		}

		toks, err := NewTokenStream(gram, strings.NewReader(src))
		if err != nil {
			t.Fatal(err)
		}

		p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
		if err != nil {
			t.Fatal(err)
		}

		err = p.Parse()
		if err != nil {
			t.Fatal(err)
		}

		if len(semAct.actLog) != len(actLogWithLAC) {
			t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
		}

		for i, e := range actLogWithLAC {
			if semAct.actLog[i] != e {
				t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithLAC, semAct.actLog)
			}
		}
	})

	t.Run("LAC is disabled", func(t *testing.T) {
		semAct := &testSemAct{
			gram: gram,
		}

		toks, err := NewTokenStream(gram, strings.NewReader(src))
		if err != nil {
			t.Fatal(err)
		}

		p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct), DisableLAC())
		if err != nil {
			t.Fatal(err)
		}

		err = p.Parse()
		if err != nil {
			t.Fatal(err)
		}

		if len(semAct.actLog) != len(actLogWithoutLAC) {
			t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
		}

		for i, e := range actLogWithoutLAC {
			if semAct.actLog[i] != e {
				t.Fatalf("unexpected action log; want: %+v, got: %+v", actLogWithoutLAC, semAct.actLog)
			}
		}
	})
}

func termNode(kind string, text string, children ...*Node) *Node {
	return &Node{
		Type:     NodeTypeTerminal,
		KindName: kind,
		Text:     text,
		Children: children,
	}
}

func errorNode() *Node {
	return &Node{
		Type:     NodeTypeError,
		KindName: "error",
	}
}

func nonTermNode(kind string, children ...*Node) *Node {
	return &Node{
		Type:     NodeTypeNonTerminal,
		KindName: kind,
		Children: children,
	}
}

func TestParser_Parse(t *testing.T) {
	tests := []struct {
		specSrc string
		src     string
		synErr  bool
		cst     *Node
		ast     *Node
	}{
		{
			specSrc: `
#name test;

expr
    : expr add term
    | term
    ;
term
    : term mul factor
    | factor
    ;
factor
    : l_paren expr r_paren
    | id
    ;

add
    : '+';
mul
    : '*';
l_paren
    : '(';
r_paren
    : ')';
id
    : "[A-Za-z_][0-9A-Za-z_]*";
`,
			src: `(a+(b+c))*d+e`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("term",
						nonTermNode("term",
							nonTermNode("factor",
								termNode("l_paren", "("),
								nonTermNode("expr",
									nonTermNode("expr",
										nonTermNode("term",
											nonTermNode("factor",
												termNode("id", "a"),
											),
										),
									),
									termNode("add", "+"),
									nonTermNode("term",
										nonTermNode("factor",
											termNode("l_paren", "("),
											nonTermNode("expr",
												nonTermNode("expr",
													nonTermNode("term",
														nonTermNode("factor",
															termNode("id", "b"),
														),
													),
												),
												termNode("add", "+"),
												nonTermNode("term",
													nonTermNode("factor",
														termNode("id", "c"),
													),
												),
											),
											termNode("r_paren", ")"),
										),
									),
								),
								termNode("r_paren", ")"),
							),
						),
						termNode("mul", "*"),
						nonTermNode("factor",
							termNode("id", "d"),
						),
					),
				),
				termNode("add", "+"),
				nonTermNode("term",
					nonTermNode("factor",
						termNode("id", "e"),
					),
				),
			),
		},
		// Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are
		// not allowed in string literals.
		{
			specSrc: `
#name test;

s
    : a b c
    ;

a
    : '\f{foo}';
b
    : '\u{0000}';
c
    : '\p{gc=Letter}';
`,
			src: `\f{foo}\u{0000}\p{gc=Letter}`,
			cst: nonTermNode("s",
				termNode("a", `\f{foo}`),
				termNode("b", `\u{0000}`),
				termNode("c", `\p{gc=Letter}`),
			),
		},
		// The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
		{
			specSrc: `
#name test;

s
    : foo bar
    ;
foo
    :
    ;
bar
    : bar_text
    |
    ;
bar_text: "bar";
`,
			src: ``,
			cst: nonTermNode("s",
				nonTermNode("foo"),
				nonTermNode("bar"),
			),
		},
		// The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
		{
			specSrc: `
#name test;

s
    : foo bar
    ;
foo
    :
    ;
bar
    : bar_text
    |
    ;

bar_text
    : "bar";
`,
			src: `bar`,
			cst: nonTermNode("s",
				nonTermNode("foo"),
				nonTermNode("bar",
					termNode("bar_text", "bar"),
				),
			),
		},
		// A production can have multiple alternative productions.
		{
			specSrc: `
#name test;

#prec (
    #assign $uminus
    #left mul div
    #left add sub
);

expr
    : expr add expr
    | expr sub expr
    | expr mul expr
    | expr div expr
    | int
    | sub int #prec $uminus // This 'sub' means the unary minus symbol.
    ;

int
    : "0|[1-9][0-9]*";
add
    : '+';
sub
    : '-';
mul
    : '*';
div
    : '/';
`,
			src: `-1*-2+3-4/5`,
			ast: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("expr",
						nonTermNode("expr",
							termNode("sub", "-"),
							termNode("int", "1"),
						),
						termNode("mul", "*"),
						nonTermNode("expr",
							termNode("sub", "-"),
							termNode("int", "2"),
						),
					),
					termNode("add", "+"),
					nonTermNode("expr",
						termNode("int", "3"),
					),
				),
				termNode("sub", "-"),
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("int", "4"),
					),
					termNode("div", "/"),
					nonTermNode("expr",
						termNode("int", "5"),
					),
				),
			),
		},
		// A lexical production can have multiple production directives.
		{
			specSrc: `
#name test;

s
    : push_a push_b pop pop
    ;

push_a #mode default #push a
    : '->a';
push_b #mode a #push b
    : '->b';
pop #mode a b #pop
    : '<-';
`,
			src: `->a->b<-<-`,
			ast: nonTermNode("s",
				termNode("push_a", "->a"),
				termNode("push_b", "->b"),
				termNode("pop", "<-"),
				termNode("pop", "<-"),
			),
		},
		{
			specSrc: `
#name test;

mode_tran_seq
    : mode_tran_seq mode_tran
    | mode_tran
    ;
mode_tran
    : push_m1
    | push_m2
    | pop_m1
    | pop_m2
    ;

push_m1 #push m1
    : "->";
push_m2 #mode m1 #push m2
    : "-->";
pop_m1 #mode m1 #pop
    : "<-";
pop_m2 #mode m2 #pop
    : "<--";
whitespace #mode default m1 m2 #skip
    : "\u{0020}+";
`,
			src: ` -> --> <-- <- `,
		},
		{
			specSrc: `
#name test;

s
    : foo bar
    ;

foo
    : "foo";
bar #mode default
    : "bar";
`,
			src: `foobar`,
		},
		// When #push and #pop are applied to the same symbol, #pop will run first, then #push.
		{
			specSrc: `
#name test;

s
    : foo bar baz
    ;

foo #push m1
    : 'foo';
bar #mode m1 #pop #push m2
    : 'bar';
baz #mode m2
    : 'baz';
`,
			src: `foobarbaz`,
			ast: nonTermNode("s",
				termNode("foo", "foo"),
				termNode("bar", "bar"),
				termNode("baz", "baz"),
			),
		},
		// When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first
		// in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which
		// they are executed.
		{
			specSrc: `
#name test;

s
    : foo bar baz
    ;

foo #push m1
    : 'foo';
bar #mode m1 #push m2 #pop
    : 'bar';
baz #mode m2
    : 'baz';
`,
			src: `foobarbaz`,
			ast: nonTermNode("s",
				termNode("foo", "foo"),
				termNode("bar", "bar"),
				termNode("baz", "baz"),
			),
		},
		// The parser can skips specified tokens.
		{
			specSrc: `
#name test;

s
    : foo bar
    ;

foo
    : "foo";
bar
    : "bar";
white_space #skip
    : "[\u{0009}\u{0020}]+";
`,
			src: `foo bar`,
		},
		// A grammar can contain fragments.
		{
			specSrc: `
#name test;

s
    : tagline
    ;
tagline
    : "\f{words} IS OUT THERE.";
fragment words
    : "[A-Za-z\u{0020}]+";
`,
			src: `THE TRUTH IS OUT THERE.`,
		},
		// A grammar can contain ast actions.
		{
			specSrc: `
#name test;

list
    : l_bracket elems r_bracket #ast elems...
    ;
elems
    : elems comma id #ast elems... id
    | id
    ;

whitespace #skip
    : "\u{0020}+";
l_bracket
    : '[';
r_bracket
    : ']';
comma
    : ',';
id
    : "[A-Za-z]+";
`,
			src: `[Byers, Frohike, Langly]`,
			cst: nonTermNode("list",
				termNode("x_1", "["),
				nonTermNode("elems",
					nonTermNode("elems",
						nonTermNode("elems",
							termNode("id", "Byers"),
						),
						termNode("x_3", ","),
						termNode("id", "Frohike"),
					),
					termNode("x_3", ","),
					termNode("id", "Langly"),
				),
				termNode("x_2", "]"),
			),
			ast: nonTermNode("list",
				termNode("id", "Byers"),
				termNode("id", "Frohike"),
				termNode("id", "Langly"),
			),
		},
		// The '...' operator can expand child nodes.
		{
			specSrc: `
#name test;

s
    : a #ast a...
    ;
a
    : a comma foo #ast a... foo
    | foo
    ;

comma
    : ',';
foo
    : 'foo';
`,
			src: `foo,foo,foo`,
			ast: nonTermNode("s",
				termNode("foo", "foo"),
				termNode("foo", "foo"),
				termNode("foo", "foo"),
			),
		},
		// The '...' operator also can applied to an element having no children.
		{
			specSrc: `
#name test;

s
    : a semi_colon #ast a...
    ;
a
    :
    ;

semi_colon
    : ';';
`,
			src: `;`,
			ast: nonTermNode("s"),
		},
		// A label can be a parameter of #ast directive.
		{
			specSrc: `
#name test;

#prec (
    #left add sub
);

expr
    : expr@lhs add expr@rhs #ast add lhs rhs
    | expr@lhs sub expr@rhs #ast sub lhs rhs
    | num
    ;

add
    : '+';
sub
    : '-';
num
    : "0|[1-9][0-9]*";
`,
			src: `1+2-3`,
			ast: nonTermNode("expr",
				termNode("sub", "-"),
				nonTermNode("expr",
					termNode("add", "+"),
					nonTermNode("expr",
						termNode("num", "1"),
					),
					nonTermNode("expr",
						termNode("num", "2"),
					),
				),
				nonTermNode("expr",
					termNode("num", "3"),
				),
			),
		},
		// An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed.
		{
			specSrc: `
#name test;

s
    : foo@x semi_colon #ast foo
    ;

semi_colon
    : ';';
foo
    : 'foo';
`,
			src: `foo;`,
			ast: nonTermNode("s",
				termNode("foo", "foo"),
			),
		},
		// A production has the same precedence and associativity as the right-most terminal symbol.
		{
			specSrc: `
#name test;

#prec (
    #left add
);

expr
    : expr add expr // This alternative has the same precedence and associativiry as 'add'.
    | int
    ;

ws #skip
    : "[\u{0009}\u{0020}]+";
int
    : "0|[1-9][0-9]*";
add
    : '+';
`,
			// This source is recognized as the following structure because the production `expr → expr add expr` has the same
			// precedence and associativity as the symbol 'add'.
			//
			// ((1+2)+3)
			//
			// If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence
			// and associativity and this source will be recognized as the following structure.
			//
			// (1+(2+3))
			src: `1+2+3`,
			ast: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("int", "1"),
					),
					termNode("add", "+"),
					nonTermNode("expr",
						termNode("int", "2"),
					),
				),
				termNode("add", "+"),
				nonTermNode("expr",
					termNode("int", "3"),
				),
			),
		},
		// The 'prec' directive can set precedence of a production.
		{
			specSrc: `
#name test;

#prec (
    #assign $uminus
    #left mul div
    #left add sub
);

expr
    : expr add expr
    | expr sub expr
    | expr mul expr
    | expr div expr
    | int
    | sub int #prec $uminus // This 'sub' means a unary minus symbol.
    ;

ws #skip
    : "[\u{0009}\u{0020}]+";
int
    : "0|[1-9][0-9]*";
add
    : '+';
sub
    : '-';
mul
    : '*';
div
    : '/';
`,
			// This source is recognized as the following structure because the production `expr → sub expr`
			// has the `#prec mul` directive and has the same precedence of the symbol `mul`.
			//
			// (((-1) * 20) / 5)
			//
			// If the production doesn't have the `#prec` directive, this source will be recognized as
			// the following structure.
			//
			// (- ((1 * 20) / 5))
			src: `-1*20/5`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("sub", "-"),
						termNode("int", "1"),
					),
					termNode("mul", "*"),
					nonTermNode("expr",
						termNode("int", "20"),
					),
				),
				termNode("div", "/"),
				nonTermNode("expr",
					termNode("int", "5"),
				),
			),
		},
		// The grammar can contain the 'error' symbol.
		{
			specSrc: `
#name test;

s
    : id id id semi_colon
    | error semi_colon
    ;

ws #skip
    : "[\u{0009}\u{0020}]+";
semi_colon
    : ';';
id
    : "[A-Za-z_]+";
`,
			src: `foo bar baz ;`,
		},
		// The 'error' symbol can appear in an #ast directive.
		{
			specSrc: `
#name test;

s
    : foo semi_colon
    | error semi_colon #ast error
    ;

semi_colon
    : ';';
foo
    : 'foo';
`,
			src:    `bar;`,
			synErr: true,
			ast: nonTermNode("s",
				errorNode(),
			),
		},
		// The 'error' symbol can have a label, and an #ast can reference it.
		{
			specSrc: `
#name test;

s
    : foo semi_colon
    | error@e semi_colon #ast e
    ;

semi_colon
    : ';';
foo
    : 'foo';
`,
			src:    `bar;`,
			synErr: true,
			ast: nonTermNode("s",
				errorNode(),
			),
		},
		// The grammar can contain the 'recover' directive.
		{
			specSrc: `
#name test;

seq
    : seq elem
    | elem
    ;
elem
    : id id id semi_colon
    | error semi_colon #recover
    ;

ws #skip
    : "[\u{0009}\u{0020}]+";
semi_colon
    : ';';
id
    : "[A-Za-z_]+";
`,
			src: `a b c ; d e f ;`,
		},
		// The same label can be used between different alternatives.
		{
			specSrc: `
#name test;

s
    : foo@x bar
    | foo@x
    ;

foo: 'foo';
bar: 'bar';
`,
			src: `foo`,
		},
	}

	for i, tt := range tests {
		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
			if err != nil {
				t.Fatal(err)
			}

			b := grammar.GrammarBuilder{
				AST: ast,
			}
			cg, _, err := b.Build()
			if err != nil {
				t.Fatal(err)
			}

			toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
			if err != nil {
				t.Fatal(err)
			}

			gram := NewGrammar(cg)
			tb := NewDefaultSyntaxTreeBuilder()
			var opt []ParserOption
			switch {
			case tt.ast != nil:
				opt = append(opt, SemanticAction(NewASTActionSet(gram, tb)))
			case tt.cst != nil:
				opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb)))
			}
			p, err := NewParser(toks, gram, opt...)
			if err != nil {
				t.Fatal(err)
			}

			err = p.Parse()
			if err != nil {
				t.Fatal(err)
			}

			if !tt.synErr && len(p.SyntaxErrors()) > 0 {
				for _, synErr := range p.SyntaxErrors() {
					t.Fatalf("unexpected syntax errors occurred: %v", synErr)
				}
			}

			switch {
			case tt.ast != nil:
				testTree(t, tb.Tree(), tt.ast)
			case tt.cst != nil:
				testTree(t, tb.Tree(), tt.cst)
			}
		})
	}
}

func testTree(t *testing.T, node, expected *Node) {
	t.Helper()

	if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text {
		t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node)
	}
	if len(node.Children) != len(expected.Children) {
		t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children))
	}
	for i, c := range node.Children {
		testTree(t, c, expected.Children[i])
	}
}

type testSemAct struct {
	gram   *spec.CompiledGrammar
	actLog []string
}

func (a *testSemAct) Shift(tok VToken, recovered bool) {
	t := a.gram.Syntactic.Terminals[tok.TerminalID()]
	if recovered {
		a.actLog = append(a.actLog, fmt.Sprintf("shift/%v/recovered", t))
	} else {
		a.actLog = append(a.actLog, fmt.Sprintf("shift/%v", t))
	}
}

func (a *testSemAct) Reduce(prodNum int, recovered bool) {
	lhsSym := a.gram.Syntactic.LHSSymbols[prodNum]
	lhsText := a.gram.Syntactic.NonTerminals[lhsSym]
	if recovered {
		a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v/recovered", lhsText))
	} else {
		a.actLog = append(a.actLog, fmt.Sprintf("reduce/%v", lhsText))
	}
}

func (a *testSemAct) Accept() {
	a.actLog = append(a.actLog, "accept")
}

func (a *testSemAct) TrapAndShiftError(cause VToken, popped int) {
	a.actLog = append(a.actLog, fmt.Sprintf("trap/%v/shift/error", popped))
}

func (a *testSemAct) MissError(cause VToken) {
	a.actLog = append(a.actLog, "miss")
}

func TestParserWithSemanticAction(t *testing.T) {
	specSrcWithErrorProd := `
#name test;

seq
    : seq elem semicolon
	| elem semicolon
    | error star star semicolon
	| error semicolon #recover
	;
elem
    : char char char
	;

ws #skip
    : "[\u{0009}\u{0020}]+";
semicolon
    : ';';
star
    : '*';
char
    : "[a-z]";
`

	specSrcWithoutErrorProd := `
#name test;

seq
    : seq elem semicolon
	| elem semicolon
	;
elem
    : char char char
	;

ws #skip
    : "[\u{0009}\u{0020}]+";
semicolon
    : ';';
char
    : "[a-z]";
`

	tests := []struct {
		caption string
		specSrc string
		src     string
		actLog  []string
	}{
		{
			caption: "when an input contains no syntax error, the driver calls `Shift`, `Reduce`, and `Accept`.",
			specSrc: specSrcWithErrorProd,
			src:     `a b c; d e f;`,
			actLog: []string{
				"shift/char",
				"shift/char",
				"shift/char",
				"reduce/elem",
				"shift/semicolon",
				"reduce/seq",

				"shift/char",
				"shift/char",
				"shift/char",
				"reduce/elem",
				"shift/semicolon",
				"reduce/seq",

				"accept",
			},
		},
		{
			caption: "when a grammar has `error` symbol, the driver calls `TrapAndShiftError`.",
			specSrc: specSrcWithErrorProd,
			src:     `a; b !; c d !; e ! * *; h i j;`,
			actLog: []string{
				"shift/char",
				"trap/1/shift/error",
				"shift/semicolon",
				"reduce/seq/recovered",

				"shift/char",
				"trap/2/shift/error",
				"shift/semicolon",
				"reduce/seq/recovered",

				"shift/char",
				"shift/char",
				"trap/3/shift/error",
				"shift/semicolon",
				"reduce/seq/recovered",

				"shift/char",
				"trap/2/shift/error",
				"shift/star",
				"shift/star",
				// When the driver shifts three times, it recovers from an error.
				"shift/semicolon/recovered",
				"reduce/seq",

				"shift/char",
				"shift/char",
				"shift/char",
				"reduce/elem",
				"shift/semicolon",
				"reduce/seq",

				// Even if the input contains syntax errors, the driver calls `Accept` when the input is accepted
				// according to the error production.
				"accept",
			},
		},
		{
			caption: "when the input doesn't meet the error production, the driver calls `MissError`.",
			specSrc: specSrcWithErrorProd,
			src:     `a !`,
			actLog: []string{
				"shift/char",
				"trap/1/shift/error",

				"miss",
			},
		},
		{
			caption: "when a syntax error isn't trapped, the driver calls `MissError`.",
			specSrc: specSrcWithoutErrorProd,
			src:     `a !`,
			actLog: []string{
				"shift/char",

				"miss",
			},
		},
	}
	for _, tt := range tests {
		t.Run(tt.caption, func(t *testing.T) {
			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
			if err != nil {
				t.Fatal(err)
			}

			b := grammar.GrammarBuilder{
				AST: ast,
			}
			gram, _, err := b.Build()
			if err != nil {
				t.Fatal(err)
			}

			toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
			if err != nil {
				t.Fatal(err)
			}

			semAct := &testSemAct{
				gram: gram,
			}
			p, err := NewParser(toks, NewGrammar(gram), SemanticAction(semAct))
			if err != nil {
				t.Fatal(err)
			}

			err = p.Parse()
			if err != nil {
				t.Fatal(err)
			}

			if len(semAct.actLog) != len(tt.actLog) {
				t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
			}

			for i, e := range tt.actLog {
				if semAct.actLog[i] != e {
					t.Fatalf("unexpected action log; want: %+v, got: %+v", tt.actLog, semAct.actLog)
				}
			}
		})
	}
}

func TestParserWithSyntaxErrors(t *testing.T) {
	tests := []struct {
		caption     string
		specSrc     string
		src         string
		synErrCount int
	}{
		{
			caption: "the parser can report a syntax error",
			specSrc: `
#name test;

s
    : foo
    ;

foo
    : 'foo';
`,
			src:         `bar`,
			synErrCount: 1,
		},
		{
			caption: "when the parser reduced a production having the reduce directive, the parser will recover from an error state",
			specSrc: `
#name test;

seq
    : seq elem semi_colon
	| elem semi_colon
	| error semi_colon #recover
	;
elem
    : a b c
	;

ws #skip
    : "[\u{0009}\u{0020}]+";
semi_colon
    : ';';
a
    : 'a';
b
    : 'b';
c
    : 'c';
`,
			src:         `!; a!; ab!;`,
			synErrCount: 3,
		},
		{
			caption: "After the parser shifts the error symbol, symbols are ignored until a symbol the parser can perform shift appears",
			specSrc: `
#name test;

seq
    : seq elem semi_colon
	| elem semi_colon
	| error semi_colon #recover
	;
elem
    : a b c
	;

ws #skip
    : "[\u{0009}\u{0020}]+";
semi_colon
    : ';';
a
    : 'a';
b
    : 'b';
c
    : 'c';
`,
			// After the parser trasits to the error state reading the first invalid symbol ('!'),
			// the second and third invalid symbols ('!') are ignored.
			src:         `! ! !; a!; ab!;`,
			synErrCount: 3,
		},
		{
			caption: "when the parser performs shift three times, the parser recovers from the error state",
			specSrc: `
#name test;

seq
    : seq elem semi_colon
	| elem semi_colon
	| error star star semi_colon
	;
elem
    : a b c
	;

ws #skip
    : "[\u{0009}\u{0020}]+";
semi_colon
    : ';';
star
    : '*';
a
    : 'a';
b
    : 'b';
c
    : 'c';
`,
			src:         `!**; a!**; ab!**; abc!`,
			synErrCount: 4,
		},
	}
	for i, tt := range tests {
		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
			if err != nil {
				t.Fatal(err)
			}

			b := grammar.GrammarBuilder{
				AST: ast,
			}
			gram, _, err := b.Build()
			if err != nil {
				t.Fatal(err)
			}

			toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
			if err != nil {
				t.Fatal(err)
			}

			p, err := NewParser(toks, NewGrammar(gram))
			if err != nil {
				t.Fatal(err)
			}

			err = p.Parse()
			if err != nil {
				t.Fatal(err)
			}

			synErrs := p.SyntaxErrors()
			if len(synErrs) != tt.synErrCount {
				t.Fatalf("unexpected syntax error; want: %v error(s), got: %v error(s)", tt.synErrCount, len(synErrs))
			}
		})
	}
}

func TestParserWithSyntaxErrorAndExpectedLookahead(t *testing.T) {
	tests := []struct {
		caption  string
		specSrc  string
		src      string
		cause    string
		expected []string
	}{
		{
			caption: "the parser reports an expected lookahead symbol",
			specSrc: `
#name test;

s
    : foo
    ;

foo
    : 'foo';
`,
			src:   `bar`,
			cause: `bar`,
			expected: []string{
				"foo",
			},
		},
		{
			caption: "the parser reports expected lookahead symbols",
			specSrc: `
#name test;

s
    : foo
    | bar
    ;

foo
    : 'foo';
bar
    : 'bar';
`,
			src:   `baz`,
			cause: `baz`,
			expected: []string{
				"foo",
				"bar",
			},
		},
		{
			caption: "the parser may report the EOF as an expected lookahead symbol",
			specSrc: `
#name test;

s
    : foo
    ;

foo
    : 'foo';
`,
			src:   `foobar`,
			cause: `bar`,
			expected: []string{
				"<eof>",
			},
		},
		{
			caption: "the parser may report the EOF and others as expected lookahead symbols",
			specSrc: `
#name test;

s
    : foo
    |
    ;

foo
    : 'foo';
`,
			src:   `bar`,
			cause: `bar`,
			expected: []string{
				"foo",
				"<eof>",
			},
		},
	}
	for i, tt := range tests {
		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
			ast, err := parser.Parse(strings.NewReader(tt.specSrc))
			if err != nil {
				t.Fatal(err)
			}

			b := grammar.GrammarBuilder{
				AST: ast,
			}
			gram, _, err := b.Build()
			if err != nil {
				t.Fatal(err)
			}

			toks, err := NewTokenStream(gram, strings.NewReader(tt.src))
			if err != nil {
				t.Fatal(err)
			}

			p, err := NewParser(toks, NewGrammar(gram))
			if err != nil {
				t.Fatal(err)
			}

			err = p.Parse()
			if err != nil {
				t.Fatal(err)
			}

			synErrs := p.SyntaxErrors()
			if synErrs == nil {
				t.Fatalf("expected one syntax error, but it didn't occur")
			}
			if len(synErrs) != 1 {
				t.Fatalf("too many syntax errors: %v errors", len(synErrs))
			}
			synErr := synErrs[0]
			if string(synErr.Token.Lexeme()) != tt.cause {
				t.Fatalf("unexpected lexeme: want: %v, got: %v", tt.cause, string(synErr.Token.Lexeme()))
			}
			if len(synErr.ExpectedTerminals) != len(tt.expected) {
				t.Fatalf("unexpected lookahead symbols: want: %v, got: %v", tt.expected, synErr.ExpectedTerminals)
			}
			sort.Slice(tt.expected, func(i, j int) bool {
				return tt.expected[i] < tt.expected[j]
			})
			sort.Slice(synErr.ExpectedTerminals, func(i, j int) bool {
				return synErr.ExpectedTerminals[i] < synErr.ExpectedTerminals[j]
			})
			for i, e := range tt.expected {
				if synErr.ExpectedTerminals[i] != e {
					t.Errorf("unexpected lookahead symbol: want: %v, got: %v", e, synErr.ExpectedTerminals[i])
				}
			}
		})
	}
}


func MainTest() {}