Add validation of lexical specs and improve error messages

author: Ryo Nihei <nihei.dev@gmail.com> 2021-04-17 22:51:06 +0900
committer: Ryo Nihei <nihei.dev@gmail.com> 2021-04-17 22:51:06 +0900
commit: 88f83624dc6d7c3b66a34c7c3f414719530e421f (patch)
tree: 31c0d8c966a4eaf98dd1670855298a8b4e0969c2 /driver/lexer_test.go
parent: Change the lexical specs of regexp and define concrete syntax error values (diff)
download: tre-88f83624dc6d7c3b66a34c7c3f414719530e421f.tar.gz
tre-88f83624dc6d7c3b66a34c7c3f414719530e421f.tar.xz
1 files changed, 31 insertions, 32 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 1f3841b..68830a5 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -10,6 +10,13 @@ import (
 	"github.com/nihei9/maleeni/spec"
 )
 
+func newLexEntry(kind string, pattern string) *spec.LexEntry {
+	return &spec.LexEntry{
+		Kind:    spec.LexKind(kind),
+		Pattern: spec.LexPattern(pattern),
+	}
+}
+
 func TestLexer_Next(t *testing.T) {
 	test := []struct {
 		lspec  *spec.LexSpec
@@ -19,8 +26,8 @@ func TestLexer_Next(t *testing.T) {
 		{
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
-					spec.NewLexEntry("t1", "(a|b)*abb"),
-					spec.NewLexEntry("t2", " +"),
+					newLexEntry("t1", "(a|b)*abb"),
+					newLexEntry("t2", " +"),
 				},
 			},
 			src: "abb aabb aaabb babb bbabb abbbabb",
@@ -42,9 +49,9 @@ func TestLexer_Next(t *testing.T) {
 		{
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
-					spec.NewLexEntry("t1", "b?a+"),
-					spec.NewLexEntry("t2", "(ab)?(cd)+"),
-					spec.NewLexEntry("t3", " +"),
+					newLexEntry("t1", "b?a+"),
+					newLexEntry("t2", "(ab)?(cd)+"),
+					newLexEntry("t3", " +"),
 				},
 			},
 			src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
@@ -70,7 +77,7 @@ func TestLexer_Next(t *testing.T) {
 		{
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
-					spec.NewLexEntry("t1", "."),
+					newLexEntry("t1", "."),
 				},
 			},
 			src: string([]byte{
@@ -114,7 +121,7 @@ func TestLexer_Next(t *testing.T) {
 		{
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
-					spec.NewLexEntry("t1", "[ab.*+?|()[\\]]"),
+					newLexEntry("t1", "[ab.*+?|()[\\]]"),
 				},
 			},
 			src: "ab.*+?|()[]",
@@ -142,7 +149,7 @@ func TestLexer_Next(t *testing.T) {
 					// maleeni cannot handle the null character in patterns because compiler.lexer,
 					// specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist.
 					// There is room for improvement in this behavior of the lexer.
-					spec.NewLexEntry("1ByteChar", "[\x01-\x7f]"),
+					newLexEntry("1ByteChar", "[\x01-\x7f]"),
 				},
 			},
 			src: string([]byte{
@@ -163,7 +170,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// all 2 byte characters
-					spec.NewLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
+					newLexEntry("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -184,7 +191,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// All bytes are the same.
-					spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
+					newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
 				},
 			},
 			src: string([]byte{
@@ -199,7 +206,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first two bytes are the same.
-					spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
+					newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -220,7 +227,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first byte are the same.
-					spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
+					newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -241,7 +248,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// all 3 byte characters
-					spec.NewLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
+					newLexEntry("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -286,7 +293,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// All bytes are the same.
-					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
+					newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
 				},
 			},
 			src: string([]byte{
@@ -301,7 +308,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first 3 bytes are the same.
-					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
+					newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -322,7 +329,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first 2 bytes are the same.
-					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
+					newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -343,7 +350,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first byte are the same.
-					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
+					newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -364,7 +371,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// all 4 byte characters
-					spec.NewLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
+					newLexEntry("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -400,7 +407,7 @@ func TestLexer_Next(t *testing.T) {
 		{
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
-					spec.NewLexEntry("NonNumber", "[^0-9]+[0-9]"),
+					newLexEntry("NonNumber", "[^0-9]+[0-9]"),
 				},
 			},
 			src: "foo9",
@@ -439,8 +446,8 @@ func TestLexer_Next(t *testing.T) {
 func TestLexer_PeekN(t *testing.T) {
 	clspec, err := compiler.Compile(&spec.LexSpec{
 		Entries: []*spec.LexEntry{
-			spec.NewLexEntry("", "foo"),
-			spec.NewLexEntry("", "bar"),
+			newLexEntry("t1", "foo"),
+			newLexEntry("t2", "bar"),
 		},
 	})
 	if err != nil {
@@ -452,17 +459,9 @@ func TestLexer_PeekN(t *testing.T) {
 	}
 
 	expectedTokens := []*Token{
-		{
-			ID:    1,
-			Match: newByteSequence([]byte("foo")),
-		},
-		{
-			ID:    2,
-			Match: newByteSequence([]byte("bar")),
-		},
-		{
-			EOF: true,
-		},
+		newToken(1, "t1", []byte("foo")),
+		newToken(2, "t2", []byte("bar")),
+		newEOFToken(),
 	}
 
 	tok, err := lex.Peek1()
author	Ryo Nihei <nihei.dev@gmail.com>	2021-04-17 22:51:06 +0900
committer	Ryo Nihei <nihei.dev@gmail.com>	2021-04-17 22:51:06 +0900
commit	88f83624dc6d7c3b66a34c7c3f414719530e421f (patch)
tree	31c0d8c966a4eaf98dd1670855298a8b4e0969c2 /driver/lexer_test.go
parent	Change the lexical specs of regexp and define concrete syntax error values (diff)
download	tre-88f83624dc6d7c3b66a34c7c3f414719530e421f.tar.gz tre-88f83624dc6d7c3b66a34c7c3f414719530e421f.tar.xz