package spec

import (
	"strings"
	"testing"

	verr "github.com/nihei9/vartan/error"
)

func TestLexer_Run(t *testing.T) {
	idTok := func(text string) *token {
		return newIDToken(text, newPosition(1, 0))
	}

	termPatTok := func(text string) *token {
		return newTerminalPatternToken(text, newPosition(1, 0))
	}

	strTok := func(text string) *token {
		return newStringLiteralToken(text, newPosition(1, 0))
	}

	symTok := func(kind tokenKind) *token {
		return newSymbolToken(kind, newPosition(1, 0))
	}

	invalidTok := func(text string) *token {
		return newInvalidToken(text, newPosition(1, 0))
	}

	tests := []struct {
		caption string
		src     string
		tokens  []*token
		err     error
	}{
		{
			caption: "the lexer can recognize all kinds of tokens",
			src:     `id"terminal"'string':|;@...#$()`,
			tokens: []*token{
				idTok("id"),
				termPatTok("terminal"),
				strTok(`string`),
				symTok(tokenKindColon),
				symTok(tokenKindOr),
				symTok(tokenKindSemicolon),
				symTok(tokenKindLabelMarker),
				symTok(tokenKindExpantion),
				symTok(tokenKindDirectiveMarker),
				symTok(tokenKindOrderedSymbolMarker),
				symTok(tokenKindLParen),
				symTok(tokenKindRParen),
				newEOFToken(),
			},
		},
		{
			caption: "the lexer can recognize keywords",
			src:     `fragment`,
			tokens: []*token{
				symTok(tokenKindKWFragment),
				newEOFToken(),
			},
		},
		{
			caption: "the lexer can recognize character sequences and escape sequences in a terminal",
			src:     `"abc\"\\"`,
			tokens: []*token{
				termPatTok(`abc"\\`),
				newEOFToken(),
			},
		},
		{
			caption: "the lexer can recognize character sequences and escape sequences in a string literal",
			src:     `'.*+?|()[\'\\'`,
			tokens: []*token{
				strTok(`.*+?|()['\`),
				newEOFToken(),
			},
		},
		{
			caption: "a pattern must include at least one character",
			src:     `""`,
			err:     synErrEmptyPattern,
		},
		{
			caption: "a string must include at least one character",
			src:     `''`,
			err:     synErrEmptyString,
		},
		{
			caption: "the lexer can recognize newlines and combine consecutive newlines into one",
			src:     "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A",
			tokens: []*token{
				symTok(tokenKindNewline),
				symTok(tokenKindOr),
				symTok(tokenKindNewline),
				symTok(tokenKindOr),
				symTok(tokenKindNewline),
				symTok(tokenKindOr),
				symTok(tokenKindNewline),
				newEOFToken(),
			},
		},
		{
			caption: "the lexer ignores line comments",
			src: `
// This is the first comment.
foo
// This is the second comment.
// This is the third comment.
bar // This is the fourth comment.
`,
			tokens: []*token{
				symTok(tokenKindNewline),
				idTok("foo"),
				symTok(tokenKindNewline),
				idTok("bar"),
				symTok(tokenKindNewline),
				newEOFToken(),
			},
		},
		{
			caption: "an identifier cannot contain the capital-case letters",
			src:     `Abc`,
			err:     synErrIDInvalidChar,
		},
		{
			caption: "an identifier cannot contain the capital-case letters",
			src:     `Zyx`,
			err:     synErrIDInvalidChar,
		},
		{
			caption: "the underscore cannot be placed at the beginning of an identifier",
			src:     `_abc`,
			err:     synErrIDInvalidUnderscorePos,
		},
		{
			caption: "the underscore cannot be placed at the end of an identifier",
			src:     `abc_`,
			err:     synErrIDInvalidUnderscorePos,
		},
		{
			caption: "the underscore cannot be placed consecutively",
			src:     `a__b`,
			err:     synErrIDConsecutiveUnderscores,
		},
		{
			caption: "the digits cannot be placed at the biginning of an identifier",
			src:     `0abc`,
			err:     synErrIDInvalidDigitsPos,
		},
		{
			caption: "the digits cannot be placed at the biginning of an identifier",
			src:     `9abc`,
			err:     synErrIDInvalidDigitsPos,
		},
		{
			caption: "an unclosed terminal is not a valid token",
			src:     `"abc`,
			err:     synErrUnclosedTerminal,
		},
		{
			caption: "an incompleted terminal in a pattern is not a valid token",
			src:     `"\`,
			err:     synErrIncompletedEscSeq,
		},
		{
			caption: "an unclosed string is not a valid token",
			src:     `'abc`,
			err:     synErrUnclosedString,
		},
		{
			caption: "an incompleted terminal in a string is not a valid token",
			src:     `'\`,
			err:     synErrIncompletedEscSeq,
		},
		{
			caption: "the lexer can recognize valid tokens following an invalid token",
			src:     `abc!!!def`,
			tokens: []*token{
				idTok("abc"),
				invalidTok("!!!"),
				idTok("def"),
				newEOFToken(),
			},
		},
		{
			caption: "the lexer skips white spaces",
			// \u0009: HT
			// \u0020: SP
			src: "a\u0009b\u0020c",
			tokens: []*token{
				idTok("a"),
				idTok("b"),
				idTok("c"),
				newEOFToken(),
			},
		},
	}
	for _, tt := range tests {
		t.Run(tt.caption, func(t *testing.T) {
			l, err := newLexer(strings.NewReader(tt.src))
			if err != nil {
				t.Fatal(err)
			}
			n := 0
			for {
				var tok *token
				tok, err = l.next()
				if err != nil {
					break
				}
				testToken(t, tok, tt.tokens[n])
				n++
				if tok.kind == tokenKindEOF {
					break
				}
			}
			if tt.err != nil {
				synErr, ok := err.(*verr.SpecError)
				if !ok {
					t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
				}
				if tt.err != synErr.Cause {
					t.Fatalf("unexpected error; want: %v, got: %v", tt.err, synErr.Cause)
				}
			} else {
				if err != nil {
					t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
				}
			}
		})
	}
}

func testToken(t *testing.T, tok, expected *token) {
	t.Helper()
	if tok.kind != expected.kind || tok.text != expected.text {
		t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok)
	}
}