spec/lexer_test.go


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121

package spec

import (
	"strings"
	"testing"
)

func TestLexer_Run(t *testing.T) {
	tests := []struct {
		caption string
		src     string
		tokens  []*token
		err     error
	}{
		{
			caption: "the lexer can recognize all kinds of tokens",
			src:     `id"terminal":|;@#`,
			tokens: []*token{
				newIDToken("id"),
				newTerminalPatternToken("terminal"),
				newSymbolToken(tokenKindColon),
				newSymbolToken(tokenKindOr),
				newSymbolToken(tokenKindSemicolon),
				newSymbolToken(tokenKindModifierMarker),
				newSymbolToken(tokenKindActionLeader),
				newEOFToken(),
			},
		},
		{
			caption: "the lexer can recognize character sequences and escape sequences in terminal",
			src:     `"abc\"\\"`,
			tokens: []*token{
				newTerminalPatternToken(`abc"\\`),
				newEOFToken(),
			},
		},
		{
			caption: "the lexer ignores line comments",
			src: `
// This is the first comment.
foo
// This is the second comment.
// This is the third comment.
bar // This is the fourth comment.
`,
			tokens: []*token{
				newIDToken("foo"),
				newIDToken("bar"),
				newEOFToken(),
			},
		},
		{
			caption: "an unclosed terminal is not a valid token",
			src:     `"abc`,
			err:     synErrUnclosedTerminal,
		},
		{
			caption: "an incompleted terminal is not a valid token",
			src:     `"\`,
			err:     synErrIncompletedEscSeq,
		},
		{
			caption: "the lexer can recognize valid tokens following an invalid token",
			src:     `abc!!!def`,
			tokens: []*token{
				newIDToken("abc"),
				newInvalidToken("!!!"),
				newIDToken("def"),
				newEOFToken(),
			},
		},
		{
			caption: "the lexer skips white spaces",
			// \u0009: HT
			// \u000A: LF
			// \u000D: CR
			// \u0020: SP
			src: "a\u0020b\u000Ac\u000Dd\u000D\u000Ae\u0009f",
			tokens: []*token{
				newIDToken("a"),
				newIDToken("b"),
				newIDToken("c"),
				newIDToken("d"),
				newIDToken("e"),
				newIDToken("f"),
				newEOFToken(),
			},
		},
	}
	for _, tt := range tests {
		t.Run(tt.caption, func(t *testing.T) {
			l, err := newLexer(strings.NewReader(tt.src))
			if err != nil {
				t.Fatal(err)
			}
			n := 0
			for {
				var tok *token
				tok, err = l.next()
				if err != nil {
					break
				}
				testToken(t, tok, tt.tokens[n])
				n++
				if tok.kind == tokenKindEOF {
					break
				}
			}
			if err != tt.err {
				t.Fatalf("unexpected error; want: %v, got: %v", tt.err, err)
			}
		})
	}
}

func testToken(t *testing.T, tok, expected *token) {
	t.Helper()
	if tok.kind != expected.kind || tok.text != expected.text {
		t.Fatalf("unexpected token; want: %+v, got: %+v", expected, tok)
	}
}