package driver

import (
	"fmt"
	"strings"
	"testing"

	"github.com/nihei9/vartan/grammar"
	spec "github.com/nihei9/vartan/spec/grammar"
)

func termNode(kind string, text string, children ...*Node) *Node {
	return &Node{
		Type:     NodeTypeTerminal,
		KindName: kind,
		Text:     text,
		Children: children,
	}
}

func errorNode() *Node {
	return &Node{
		Type:     NodeTypeError,
		KindName: "error",
	}
}

func nonTermNode(kind string, children ...*Node) *Node {
	return &Node{
		Type:     NodeTypeNonTerminal,
		KindName: kind,
		Children: children,
	}
}

func TestParser_Parse(t *testing.T) {
	tests := []struct {
		specSrc string
		src     string
		synErr  bool
		cst     *Node
		ast     *Node
	}{
		{
			specSrc: `
#name test;

expr
    : expr add term
    | term
    ;
term
    : term mul factor
    | factor
    ;
factor
    : l_paren expr r_paren
    | id
    ;

add
    : '+';
mul
    : '*';
l_paren
    : '(';
r_paren
    : ')';
id
    : "[A-Za-z_][0-9A-Za-z_]*";
`,
			src: `(a+(b+c))*d+e`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("term",
						nonTermNode("term",
							nonTermNode("factor",
								termNode("l_paren", "("),
								nonTermNode("expr",
									nonTermNode("expr",
										nonTermNode("term",
											nonTermNode("factor",
												termNode("id", "a"),
											),
										),
									),
									termNode("add", "+"),
									nonTermNode("term",
										nonTermNode("factor",
											termNode("l_paren", "("),
											nonTermNode("expr",
												nonTermNode("expr",
													nonTermNode("term",
														nonTermNode("factor",
															termNode("id", "b"),
														),
													),
												),
												termNode("add", "+"),
												nonTermNode("term",
													nonTermNode("factor",
														termNode("id", "c"),
													),
												),
											),
											termNode("r_paren", ")"),
										),
									),
								),
								termNode("r_paren", ")"),
							),
						),
						termNode("mul", "*"),
						nonTermNode("factor",
							termNode("id", "d"),
						),
					),
				),
				termNode("add", "+"),
				nonTermNode("term",
					nonTermNode("factor",
						termNode("id", "e"),
					),
				),
			),
		},
		// Fragments (\f{}), code point expressions (\u{}), and character property expressions (\p{}) are
		// not allowed in string literals.
		{
			specSrc: `
#name test;

s
    : a b c
    ;

a
    : '\f{foo}';
b
    : '\u{0000}';
c
    : '\p{gc=Letter}';
`,
			src: `\f{foo}\u{0000}\p{gc=Letter}`,
			cst: nonTermNode("s",
				termNode("a", `\f{foo}`),
				termNode("b", `\u{0000}`),
				termNode("c", `\p{gc=Letter}`),
			),
		},
		// The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
		{
			specSrc: `
#name test;

s
    : foo bar
    ;
foo
    :
    ;
bar
    : bar_text
    |
    ;
bar_text: "bar";
`,
			src: ``,
			cst: nonTermNode("s",
				nonTermNode("foo"),
				nonTermNode("bar"),
			),
		},
		// The driver can reduce productions that have the empty alternative and can generate a CST (and AST) node.
		{
			specSrc: `
#name test;

s
    : foo bar
    ;
foo
    :
    ;
bar
    : bar_text
    |
    ;

bar_text
    : "bar";
`,
			src: `bar`,
			cst: nonTermNode("s",
				nonTermNode("foo"),
				nonTermNode("bar",
					termNode("bar_text", "bar"),
				),
			),
		},
		// A production can have multiple alternative productions.
		{
			specSrc: `
#name test;

#prec (
    #assign $uminus
    #left mul div
    #left add sub
);

expr
    : expr add expr
    | expr sub expr
    | expr mul expr
    | expr div expr
    | int
    | sub int #prec $uminus // This 'sub' means the unary minus symbol.
    ;

int
    : "0|[1-9][0-9]*";
add
    : '+';
sub
    : '-';
mul
    : '*';
div
    : '/';
`,
			src: `-1*-2+3-4/5`,
			ast: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("expr",
						nonTermNode("expr",
							termNode("sub", "-"),
							termNode("int", "1"),
						),
						termNode("mul", "*"),
						nonTermNode("expr",
							termNode("sub", "-"),
							termNode("int", "2"),
						),
					),
					termNode("add", "+"),
					nonTermNode("expr",
						termNode("int", "3"),
					),
				),
				termNode("sub", "-"),
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("int", "4"),
					),
					termNode("div", "/"),
					nonTermNode("expr",
						termNode("int", "5"),
					),
				),
			),
		},
		// A lexical production can have multiple production directives.
		{
			specSrc: `
#name test;

s
    : push_a push_b pop pop
    ;

push_a #mode default #push a
    : '->a';
push_b #mode a #push b
    : '->b';
pop #mode a b #pop
    : '<-';
`,
			src: `->a->b<-<-`,
			ast: nonTermNode("s",
				termNode("push_a", "->a"),
				termNode("push_b", "->b"),
				termNode("pop", "<-"),
				termNode("pop", "<-"),
			),
		},
		{
			specSrc: `
#name test;

mode_tran_seq
    : mode_tran_seq mode_tran
    | mode_tran
    ;
mode_tran
    : push_m1
    | push_m2
    | pop_m1
    | pop_m2
    ;

push_m1 #push m1
    : "->";
push_m2 #mode m1 #push m2
    : "-->";
pop_m1 #mode m1 #pop
    : "<-";
pop_m2 #mode m2 #pop
    : "<--";
whitespace #mode default m1 m2 #skip
    : "\u{0020}+";
`,
			src: ` -> --> <-- <- `,
		},
		{
			specSrc: `
#name test;

s
    : foo bar
    ;

foo
    : "foo";
bar #mode default
    : "bar";
`,
			src: `foobar`,
		},
		// When #push and #pop are applied to the same symbol, #pop will run first, then #push.
		{
			specSrc: `
#name test;

s
    : foo bar baz
    ;

foo #push m1
    : 'foo';
bar #mode m1 #pop #push m2
    : 'bar';
baz #mode m2
    : 'baz';
`,
			src: `foobarbaz`,
			ast: nonTermNode("s",
				termNode("foo", "foo"),
				termNode("bar", "bar"),
				termNode("baz", "baz"),
			),
		},
		// When #push and #pop are applied to the same symbol, #pop will run first, then #push, even if #push appears first
		// in a definition. That is, the order in which #push and #pop appear in grammar has nothing to do with the order in which
		// they are executed.
		{
			specSrc: `
#name test;

s
    : foo bar baz
    ;

foo #push m1
    : 'foo';
bar #mode m1 #push m2 #pop
    : 'bar';
baz #mode m2
    : 'baz';
`,
			src: `foobarbaz`,
			ast: nonTermNode("s",
				termNode("foo", "foo"),
				termNode("bar", "bar"),
				termNode("baz", "baz"),
			),
		},
		// The parser can skips specified tokens.
		{
			specSrc: `
#name test;

s
    : foo bar
    ;

foo
    : "foo";
bar
    : "bar";
white_space #skip
    : "[\u{0009}\u{0020}]+";
`,
			src: `foo bar`,
		},
		// A grammar can contain fragments.
		{
			specSrc: `
#name test;

s
    : tagline
    ;
tagline
    : "\f{words} IS OUT THERE.";
fragment words
    : "[A-Za-z\u{0020}]+";
`,
			src: `THE TRUTH IS OUT THERE.`,
		},
		// A grammar can contain ast actions.
		{
			specSrc: `
#name test;

list
    : l_bracket elems r_bracket #ast elems...
    ;
elems
    : elems comma id #ast elems... id
    | id
    ;

whitespace #skip
    : "\u{0020}+";
l_bracket
    : '[';
r_bracket
    : ']';
comma
    : ',';
id
    : "[A-Za-z]+";
`,
			src: `[Byers, Frohike, Langly]`,
			cst: nonTermNode("list",
				termNode("x_1", "["),
				nonTermNode("elems",
					nonTermNode("elems",
						nonTermNode("elems",
							termNode("id", "Byers"),
						),
						termNode("x_3", ","),
						termNode("id", "Frohike"),
					),
					termNode("x_3", ","),
					termNode("id", "Langly"),
				),
				termNode("x_2", "]"),
			),
			ast: nonTermNode("list",
				termNode("id", "Byers"),
				termNode("id", "Frohike"),
				termNode("id", "Langly"),
			),
		},
		// The '...' operator can expand child nodes.
		{
			specSrc: `
#name test;

s
    : a #ast a...
    ;
a
    : a comma foo #ast a... foo
    | foo
    ;

comma
    : ',';
foo
    : 'foo';
`,
			src: `foo,foo,foo`,
			ast: nonTermNode("s",
				termNode("foo", "foo"),
				termNode("foo", "foo"),
				termNode("foo", "foo"),
			),
		},
		// The '...' operator also can applied to an element having no children.
		{
			specSrc: `
#name test;

s
    : a semi_colon #ast a...
    ;
a
    :
    ;

semi_colon
    : ';';
`,
			src: `;`,
			ast: nonTermNode("s"),
		},
		// A label can be a parameter of #ast directive.
		{
			specSrc: `
#name test;

#prec (
    #left add sub
);

expr
    : expr@lhs add expr@rhs #ast add lhs rhs
    | expr@lhs sub expr@rhs #ast sub lhs rhs
    | num
    ;

add
    : '+';
sub
    : '-';
num
    : "0|[1-9][0-9]*";
`,
			src: `1+2-3`,
			ast: nonTermNode("expr",
				termNode("sub", "-"),
				nonTermNode("expr",
					termNode("add", "+"),
					nonTermNode("expr",
						termNode("num", "1"),
					),
					nonTermNode("expr",
						termNode("num", "2"),
					),
				),
				nonTermNode("expr",
					termNode("num", "3"),
				),
			),
		},
		// An AST can contain a symbol name, even if the symbol has a label. That is, unused labels are allowed.
		{
			specSrc: `
#name test;

s
    : foo@x semi_colon #ast foo
    ;

semi_colon
    : ';';
foo
    : 'foo';
`,
			src: `foo;`,
			ast: nonTermNode("s",
				termNode("foo", "foo"),
			),
		},
		// A production has the same precedence and associativity as the right-most terminal symbol.
		{
			specSrc: `
#name test;

#prec (
    #left add
);

expr
    : expr add expr // This alternative has the same precedence and associativiry as 'add'.
    | int
    ;

ws #skip
    : "[\u{0009}\u{0020}]+";
int
    : "0|[1-9][0-9]*";
add
    : '+';
`,
			// This source is recognized as the following structure because the production `expr → expr add expr` has the same
			// precedence and associativity as the symbol 'add'.
			//
			// ((1+2)+3)
			//
			// If the symbol doesn't have the precedence and left associativity, the production also doesn't have the precedence
			// and associativity and this source will be recognized as the following structure.
			//
			// (1+(2+3))
			src: `1+2+3`,
			ast: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("int", "1"),
					),
					termNode("add", "+"),
					nonTermNode("expr",
						termNode("int", "2"),
					),
				),
				termNode("add", "+"),
				nonTermNode("expr",
					termNode("int", "3"),
				),
			),
		},
		// The 'prec' directive can set precedence of a production.
		{
			specSrc: `
#name test;

#prec (
    #assign $uminus
    #left mul div
    #left add sub
);

expr
    : expr add expr
    | expr sub expr
    | expr mul expr
    | expr div expr
    | int
    | sub int #prec $uminus // This 'sub' means a unary minus symbol.
    ;

ws #skip
    : "[\u{0009}\u{0020}]+";
int
    : "0|[1-9][0-9]*";
add
    : '+';
sub
    : '-';
mul
    : '*';
div
    : '/';
`,
			// This source is recognized as the following structure because the production `expr → sub expr`
			// has the `#prec mul` directive and has the same precedence of the symbol `mul`.
			//
			// (((-1) * 20) / 5)
			//
			// If the production doesn't have the `#prec` directive, this source will be recognized as
			// the following structure.
			//
			// (- ((1 * 20) / 5))
			src: `-1*20/5`,
			cst: nonTermNode("expr",
				nonTermNode("expr",
					nonTermNode("expr",
						termNode("sub", "-"),
						termNode("int", "1"),
					),
					termNode("mul", "*"),
					nonTermNode("expr",
						termNode("int", "20"),
					),
				),
				termNode("div", "/"),
				nonTermNode("expr",
					termNode("int", "5"),
				),
			),
		},
		// The grammar can contain the 'error' symbol.
		{
			specSrc: `
#name test;

s
    : id id id semi_colon
    | error semi_colon
    ;

ws #skip
    : "[\u{0009}\u{0020}]+";
semi_colon
    : ';';
id
    : "[A-Za-z_]+";
`,
			src: `foo bar baz ;`,
		},
		// The 'error' symbol can appear in an #ast directive.
		{
			specSrc: `
#name test;

s
    : foo semi_colon
    | error semi_colon #ast error
    ;

semi_colon
    : ';';
foo
    : 'foo';
`,
			src:    `bar;`,
			synErr: true,
			ast: nonTermNode("s",
				errorNode(),
			),
		},
		// The 'error' symbol can have a label, and an #ast can reference it.
		{
			specSrc: `
#name test;

s
    : foo semi_colon
    | error@e semi_colon #ast e
    ;

semi_colon
    : ';';
foo
    : 'foo';
`,
			src:    `bar;`,
			synErr: true,
			ast: nonTermNode("s",
				errorNode(),
			),
		},
		// The grammar can contain the 'recover' directive.
		{
			specSrc: `
#name test;

seq
    : seq elem
    | elem
    ;
elem
    : id id id semi_colon
    | error semi_colon #recover
    ;

ws #skip
    : "[\u{0009}\u{0020}]+";
semi_colon
    : ';';
id
    : "[A-Za-z_]+";
`,
			src: `a b c ; d e f ;`,
		},
		// The same label can be used between different alternatives.
		{
			specSrc: `
#name test;

s
    : foo@x bar
    | foo@x
    ;

foo: 'foo';
bar: 'bar';
`,
			src: `foo`,
		},
	}

	for i, tt := range tests {
		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
			ast, err := spec.Parse(strings.NewReader(tt.specSrc))
			if err != nil {
				t.Fatal(err)
			}

			b := grammar.GrammarBuilder{
				AST: ast,
			}
			g, err := b.Build()
			if err != nil {
				t.Fatal(err)
			}

			cg, _, err := grammar.Compile(g)
			if err != nil {
				t.Fatal(err)
			}

			toks, err := NewTokenStream(cg, strings.NewReader(tt.src))
			if err != nil {
				t.Fatal(err)
			}

			gram := NewGrammar(cg)
			tb := NewDefaultSyntaxTreeBuilder()
			var opt []ParserOption
			switch {
			case tt.ast != nil:
				opt = append(opt, SemanticAction(NewASTActionSet(gram, tb)))
			case tt.cst != nil:
				opt = append(opt, SemanticAction(NewCSTActionSet(gram, tb)))
			}
			p, err := NewParser(toks, gram, opt...)
			if err != nil {
				t.Fatal(err)
			}

			err = p.Parse()
			if err != nil {
				t.Fatal(err)
			}

			if !tt.synErr && len(p.SyntaxErrors()) > 0 {
				for _, synErr := range p.SyntaxErrors() {
					t.Fatalf("unexpected syntax errors occurred: %v", synErr)
				}
			}

			switch {
			case tt.ast != nil:
				testTree(t, tb.Tree(), tt.ast)
			case tt.cst != nil:
				testTree(t, tb.Tree(), tt.cst)
			}
		})
	}
}

func testTree(t *testing.T, node, expected *Node) {
	t.Helper()

	if node.Type != expected.Type || node.KindName != expected.KindName || node.Text != expected.Text {
		t.Fatalf("unexpected node; want: %+v, got: %+v", expected, node)
	}
	if len(node.Children) != len(expected.Children) {
		t.Fatalf("unexpected children; want: %v, got: %v", len(expected.Children), len(node.Children))
	}
	for i, c := range node.Children {
		testTree(t, c, expected.Children[i])
	}
}