1 files changed, 139 insertions, 2 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 5abe83c..33b206f 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -42,14 +42,50 @@ func newLexEntryFragment(kind string, pattern string) *spec.LexEntry {
 	}
 }
 
+func newToken(modeID spec.LexModeID, modeName spec.LexModeName, kindID spec.LexKindID, modeKindID spec.LexModeKindID, kindName spec.LexKindName, match byteSequence) *Token {
+	return &Token{
+		ModeID:     modeID,
+		ModeName:   modeName,
+		KindID:     kindID,
+		ModeKindID: modeKindID,
+		KindName:   kindName,
+		match:      match,
+	}
+}
+
 func newTokenDefault(kindID int, modeKindID int, kindName string, match byteSequence) *Token {
 	return newToken(spec.LexModeIDDefault, spec.LexModeNameDefault, spec.LexKindID(kindID), spec.LexModeKindID(modeKindID), spec.LexKindName(kindName), match)
 }
 
+func newEOFToken(modeID spec.LexModeID, modeName spec.LexModeName) *Token {
+	return &Token{
+		ModeID:     modeID,
+		ModeName:   modeName,
+		ModeKindID: 0,
+		EOF:        true,
+	}
+}
+
 func newEOFTokenDefault() *Token {
 	return newEOFToken(spec.LexModeIDDefault, spec.LexModeNameDefault)
 }
 
+func newInvalidToken(modeID spec.LexModeID, modeName spec.LexModeName, match byteSequence) *Token {
+	return &Token{
+		ModeID:     modeID,
+		ModeName:   modeName,
+		ModeKindID: 0,
+		match:      match,
+		Invalid:    true,
+	}
+}
+
+func withPos(tok *Token, row, col int) *Token {
+	tok.Row = row
+	tok.Col = col
+	return tok
+}
+
 func TestLexer_Next(t *testing.T) {
 	test := []struct {
 		lspec           *spec.LexSpec
@@ -715,7 +751,7 @@ func TestLexer_Next(t *testing.T) {
 						t.Log(err)
 						break
 					}
-					testToken(t, eTok, tok)
+					testToken(t, eTok, tok, false)
 					// t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match(), tok.Text(), tok.EOF, tok.Invalid)
 					if tok.EOF {
 						break
@@ -733,7 +769,102 @@ func TestLexer_Next(t *testing.T) {
 	}
 }
 
-func testToken(t *testing.T, expected, actual *Token) {
+func TestLexer_Next_WithPosition(t *testing.T) {
+	lspec := &spec.LexSpec{
+		Entries: []*spec.LexEntry{
+			newLexEntryDefaultNOP("newline", `\u{000A}+`),
+			newLexEntryDefaultNOP("any", `.`),
+		},
+	}
+
+	clspec, err := compiler.Compile(lspec, compiler.CompressionLevel(compiler.CompressionLevelMax))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	src := string([]byte{
+		0x00,
+		0x7F,
+		0x0A,
+
+		0xC2, 0x80,
+		0xDF, 0xBF,
+		0x0A,
+
+		0xE0, 0xA0, 0x80,
+		0xE0, 0xBF, 0xBF,
+		0xE1, 0x80, 0x80,
+		0xEC, 0xBF, 0xBF,
+		0xED, 0x80, 0x80,
+		0xED, 0x9F, 0xBF,
+		0xEE, 0x80, 0x80,
+		0xEF, 0xBF, 0xBF,
+		0x0A,
+
+		0xF0, 0x90, 0x80, 0x80,
+		0xF0, 0xBF, 0xBF, 0xBF,
+		0xF1, 0x80, 0x80, 0x80,
+		0xF3, 0xBF, 0xBF, 0xBF,
+		0xF4, 0x80, 0x80, 0x80,
+		0xF4, 0x8F, 0xBF, 0xBF,
+		0x0A,
+		0x0A,
+		0x0A,
+	})
+
+	expected := []*Token{
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0x00})), 0, 0),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0x7F})), 0, 1),
+		withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 0, 2),
+
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xC2, 0x80})), 1, 0),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xDF, 0xBF})), 1, 1),
+		withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 1, 2),
+
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE0, 0xA0, 0x80})), 2, 0),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE0, 0xBF, 0xBF})), 2, 1),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE1, 0x80, 0x80})), 2, 2),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEC, 0xBF, 0xBF})), 2, 3),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xED, 0x80, 0x80})), 2, 4),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xED, 0x9F, 0xBF})), 2, 5),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEE, 0x80, 0x80})), 2, 6),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEF, 0xBF, 0xBF})), 2, 7),
+		withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 2, 8),
+
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF0, 0x90, 0x80, 0x80})), 3, 0),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF0, 0xBF, 0xBF, 0xBF})), 3, 1),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF1, 0x80, 0x80, 0x80})), 3, 2),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF3, 0xBF, 0xBF, 0xBF})), 3, 3),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF4, 0x80, 0x80, 0x80})), 3, 4),
+		withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF4, 0x8F, 0xBF, 0xBF})), 3, 5),
+
+		// When a token contains multiple line breaks, the driver sets the token position to
+		// the line number where a lexeme first appears.
+		withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A, 0x0A, 0x0A})), 3, 6),
+
+		withPos(newEOFTokenDefault(), 0, 0),
+	}
+
+	lexer, err := NewLexer(clspec, strings.NewReader(src))
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+
+	for _, eTok := range expected {
+		tok, err := lexer.Next()
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		testToken(t, eTok, tok, true)
+
+		if tok.EOF {
+			break
+		}
+	}
+}
+
+func testToken(t *testing.T, expected, actual *Token, checkPosition bool) {
 	t.Helper()
 
 	if actual.ModeID != expected.ModeID ||
@@ -746,4 +877,10 @@ func testToken(t *testing.T, expected, actual *Token) {
 		actual.Invalid != expected.Invalid {
 		t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text())
 	}
+
+	if checkPosition {
+		if actual.Row != expected.Row || actual.Col != expected.Col {
+			t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text())
+		}
+	}
 }