Generate constant values representing mode IDs, mode names, kind IDs, and kind names

author: Ryo Nihei <nihei.dev@gmail.com> 2021-09-18 17:07:09 +0900
committer: Ryo Nihei <nihei.dev@gmail.com> 2021-09-18 17:07:09 +0900
commit: fe865a812401c2c612f2cd17cedd4728dc4798f7 (patch)
tree: 007dbc653364809e2273ba95aef0f1bea2006127
parent: Update CHANGELOG (diff)
download: tre-fe865a812401c2c612f2cd17cedd4728dc4798f7.tar.gz
tre-fe865a812401c2c612f2cd17cedd4728dc4798f7.tar.xz
4 files changed, 515 insertions, 106 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index ebb4aad..a742bad 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -227,7 +227,7 @@ func TestLexer_Next(t *testing.T) {
 					// maleeni cannot handle the null character in patterns because compiler.lexer,
 					// specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist.
 					// If a pattern needs a null character, use code point expression \u{0000}.
-					newLexEntryDefaultNOP("1ByteChar", "[\x01-\x7f]"),
+					newLexEntryDefaultNOP("char1Byte", "[\x01-\x7f]"),
 				},
 			},
 			src: string([]byte{
@@ -237,10 +237,10 @@ func TestLexer_Next(t *testing.T) {
 				0x7f,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "1ByteChar", []byte{0x01}),
-				newTokenDefault(1, 1, "1ByteChar", []byte{0x02}),
-				newTokenDefault(1, 1, "1ByteChar", []byte{0x7e}),
-				newTokenDefault(1, 1, "1ByteChar", []byte{0x7f}),
+				newTokenDefault(1, 1, "char1Byte", []byte{0x01}),
+				newTokenDefault(1, 1, "char1Byte", []byte{0x02}),
+				newTokenDefault(1, 1, "char1Byte", []byte{0x7e}),
+				newTokenDefault(1, 1, "char1Byte", []byte{0x7f}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -248,7 +248,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// all 2 byte characters
-					newLexEntryDefaultNOP("2ByteChar", "[\xc2\x80-\xdf\xbf]"),
+					newLexEntryDefaultNOP("char2Byte", "[\xc2\x80-\xdf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -258,10 +258,10 @@ func TestLexer_Next(t *testing.T) {
 				0xdf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x80}),
-				newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x81}),
-				newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbe}),
-				newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbf}),
+				newTokenDefault(1, 1, "char2Byte", []byte{0xc2, 0x80}),
+				newTokenDefault(1, 1, "char2Byte", []byte{0xc2, 0x81}),
+				newTokenDefault(1, 1, "char2Byte", []byte{0xdf, 0xbe}),
+				newTokenDefault(1, 1, "char2Byte", []byte{0xdf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -269,14 +269,14 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// All bytes are the same.
-					newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
+					newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
 				},
 			},
 			src: string([]byte{
 				0xe0, 0xa0, 0x80,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -284,7 +284,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first two bytes are the same.
-					newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
+					newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -294,10 +294,10 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xa0, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0xbe}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -305,7 +305,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first byte are the same.
-					newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
+					newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -315,10 +315,10 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -326,7 +326,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// all 3 byte characters
-					newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
+					newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -348,22 +348,22 @@ func TestLexer_Next(t *testing.T) {
 				0xef, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x80}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x81}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x80}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x81}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x80}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x81}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe1, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xe1, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xec, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xec, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x9f, 0xbe}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x9f, 0xbf}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xee, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xee, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xef, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char3Byte", []byte{0xef, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -371,14 +371,14 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// All bytes are the same.
-					newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
+					newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
 				},
 			},
 			src: string([]byte{
 				0xf0, 0x90, 0x80, 0x80,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -386,7 +386,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first 3 bytes are the same.
-					newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
+					newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -396,10 +396,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0x80, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0xbe}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -407,7 +407,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first 2 bytes are the same.
-					newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
+					newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -417,10 +417,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -428,7 +428,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// The first byte are the same.
-					newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
+					newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -438,10 +438,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0xbf, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -449,7 +449,7 @@ func TestLexer_Next(t *testing.T) {
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
 					// all 4 byte characters
-					newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
+					newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
 				},
 			},
 			src: string([]byte{
@@ -467,18 +467,18 @@ func TestLexer_Next(t *testing.T) {
 				0xf4, 0x8f, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
-				newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf1, 0x80, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf1, 0x80, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x80, 0x80, 0x80}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x80, 0x80, 0x81}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
+				newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
 				newEOFTokenDefault(),
 			},
 		},
@@ -497,18 +497,18 @@ func TestLexer_Next(t *testing.T) {
 		{
 			lspec: &spec.LexSpec{
 				Entries: []*spec.LexEntry{
-					newLexEntryDefaultNOP("1ByteChar", "\\u{006E}"),
-					newLexEntryDefaultNOP("2ByteChar", "\\u{03BD}"),
-					newLexEntryDefaultNOP("3ByteChar", "\\u{306B}"),
-					newLexEntryDefaultNOP("4ByteChar", "\\u{01F638}"),
+					newLexEntryDefaultNOP("char1Byte", "\\u{006E}"),
+					newLexEntryDefaultNOP("char2Byte", "\\u{03BD}"),
+					newLexEntryDefaultNOP("char3Byte", "\\u{306B}"),
+					newLexEntryDefaultNOP("char4Byte", "\\u{01F638}"),
 				},
 			},
 			src: "nνに😸",
 			tokens: []*Token{
-				newTokenDefault(1, 1, "1ByteChar", []byte{0x6E}),
-				newTokenDefault(2, 2, "2ByteChar", []byte{0xCE, 0xBD}),
-				newTokenDefault(3, 3, "3ByteChar", []byte{0xE3, 0x81, 0xAB}),
-				newTokenDefault(4, 4, "4ByteChar", []byte{0xF0, 0x9F, 0x98, 0xB8}),
+				newTokenDefault(1, 1, "char1Byte", []byte{0x6E}),
+				newTokenDefault(2, 2, "char2Byte", []byte{0xCE, 0xBD}),
+				newTokenDefault(3, 3, "char3Byte", []byte{0xE3, 0x81, 0xAB}),
+				newTokenDefault(4, 4, "char4Byte", []byte{0xF0, 0x9F, 0x98, 0xB8}),
 				newEOFTokenDefault(),
 			},
 		},
diff --git a/driver/template.go b/driver/template.go
index f7caa75..2772135 100644
--- a/driver/template.go
+++ b/driver/template.go
@@ -35,6 +35,67 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
 		lexerSrc = b.String()
 	}
 
+	var modeIDsSrc string
+	{
+		var b strings.Builder
+		fmt.Fprintf(&b, "const (\n")
+		for i, k := range clspec.ModeNames {
+			if i == spec.LexModeIDNil.Int() {
+				fmt.Fprintf(&b, "    ModeIDNil ModeID = %v\n", i)
+				continue
+			}
+			fmt.Fprintf(&b, "    ModeID%v ModeID = %v\n", spec.SnakeCaseToUpperCamelCase(k.String()), i)
+		}
+		fmt.Fprintf(&b, ")")
+
+		modeIDsSrc = b.String()
+	}
+
+	var modeNamesSrc string
+	{
+		var b strings.Builder
+		fmt.Fprintf(&b, "const (\n")
+		for i, k := range clspec.ModeNames {
+			if i == spec.LexModeIDNil.Int() {
+				fmt.Fprintf(&b, "    ModeNameNil = %#v\n", "")
+				continue
+			}
+			fmt.Fprintf(&b, "    ModeName%v = %#v\n", spec.SnakeCaseToUpperCamelCase(k.String()), k)
+		}
+		fmt.Fprintf(&b, ")")
+
+		modeNamesSrc = b.String()
+	}
+
+	var kindIDsSrc string
+	{
+		var b strings.Builder
+		fmt.Fprintf(&b, "const (\n")
+		for i, k := range clspec.KindNames {
+			if i == spec.LexKindIDNil.Int() {
+				fmt.Fprintf(&b, "    KindIDNil KindID = %v\n", i)
+				continue
+			}
+			fmt.Fprintf(&b, "    KindID%v KindID = %v\n", spec.SnakeCaseToUpperCamelCase(k.String()), i)
+		}
+		fmt.Fprintf(&b, ")")
+
+		kindIDsSrc = b.String()
+	}
+
+	var kindNamesSrc string
+	{
+		var b strings.Builder
+		fmt.Fprintf(&b, "const (\n")
+		fmt.Fprintf(&b, "    KindNameNil = %#v\n", "")
+		for _, k := range clspec.KindNames[1:] {
+			fmt.Fprintf(&b, "    KindName%v = %#v\n", spec.SnakeCaseToUpperCamelCase(k.String()), k)
+		}
+		fmt.Fprintf(&b, ")")
+
+		kindNamesSrc = b.String()
+	}
+
 	var specSrc string
 	{
 		t, err := template.New("").Funcs(genTemplateFuncs(clspec)).Parse(lexSpecTemplate)
@@ -44,8 +105,8 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
 
 		var b strings.Builder
 		err = t.Execute(&b, map[string]interface{}{
-			"initialModeID":    clspec.InitialModeID,
-			"modeIDNil":        spec.LexModeIDNil,
+			"initialModeID":    "ModeID" + spec.SnakeCaseToUpperCamelCase(clspec.ModeNames[clspec.InitialModeID].String()),
+			"modeIDNil":        "ModeIDNil",
 			"modeKindIDNil":    spec.LexModeKindIDNil,
 			"stateIDNil":       spec.StateIDNil,
 			"compressionLevel": clspec.CompressionLevel,
@@ -62,6 +123,14 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
 		tmpl := `// Code generated by maleeni-go. DO NOT EDIT.
 {{ .lexerSrc }}
 
+{{ .modeIDsSrc }}
+
+{{ .modeNamesSrc }}
+
+{{ .kindIDsSrc }}
+
+{{ .kindNamesSrc }}
+
 {{ .specSrc }}
 `
 
@@ -72,8 +141,12 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error {
 
 		var b strings.Builder
 		err = t.Execute(&b, map[string]string{
-			"lexerSrc": lexerSrc,
-			"specSrc":  specSrc,
+			"lexerSrc":     lexerSrc,
+			"modeIDsSrc":   modeIDsSrc,
+			"modeNamesSrc": modeNamesSrc,
+			"kindIDsSrc":   kindIDsSrc,
+			"kindNamesSrc": kindNamesSrc,
+			"specSrc":      specSrc,
 		})
 		if err != nil {
 			return err
@@ -239,11 +312,10 @@ func genTemplateFuncs(clspec *spec.CompiledLexSpec) template.FuncMap {
 			fmt.Fprintf(&b, "[]string{\n")
 			for i, name := range clspec.ModeNames {
 				if i == spec.LexModeIDNil.Int() {
-					fmt.Fprintf(&b, "%#v,\n", "")
+					fmt.Fprintf(&b, "ModeNameNil,\n")
 					continue
 				}
-
-				fmt.Fprintf(&b, "%#v,\n", name)
+				fmt.Fprintf(&b, "ModeName%v,\n", spec.SnakeCaseToUpperCamelCase(name.String()))
 			}
 			fmt.Fprintf(&b, "}")
 			return b.String()
@@ -290,10 +362,13 @@ func genTemplateFuncs(clspec *spec.CompiledLexSpec) template.FuncMap {
 					continue
 				}
 
-				fmt.Fprintf(&b, "{")
-				fmt.Fprintf(&b, "%v", ids[0])
-				for _, v := range ids[1:] {
-					fmt.Fprintf(&b, ", %v", v)
+				fmt.Fprintf(&b, "{\n")
+				for j, id := range ids {
+					if j == spec.LexModeKindIDNil.Int() {
+						fmt.Fprintf(&b, "KindIDNil,\n")
+						continue
+					}
+					fmt.Fprintf(&b, "KindID%v,\n", spec.SnakeCaseToUpperCamelCase(string(clspec.KindNames[id].String())))
 				}
 				fmt.Fprintf(&b, "},\n")
 			}
@@ -305,11 +380,10 @@ func genTemplateFuncs(clspec *spec.CompiledLexSpec) template.FuncMap {
 			fmt.Fprintf(&b, "[]string{\n")
 			for i, name := range clspec.KindNames {
 				if i == spec.LexKindIDNil.Int() {
-					fmt.Fprintf(&b, "%#v,\n", "")
+					fmt.Fprintf(&b, "KindNameNil,\n")
 					continue
 				}
-
-				fmt.Fprintf(&b, "%#v,\n", name)
+				fmt.Fprintf(&b, "KindName%v,\n", spec.SnakeCaseToUpperCamelCase(name.String()))
 			}
 			fmt.Fprintf(&b, "}")
 			return b.String()
diff --git a/spec/spec.go b/spec/spec.go
index 9ac5f4b..62acfc4 100644
--- a/spec/spec.go
+++ b/spec/spec.go
@@ -3,6 +3,7 @@ package spec
 import (
 	"fmt"
 	"regexp"
+	"sort"
 	"strconv"
 	"strings"
 )
@@ -42,19 +43,13 @@ func (k LexKindName) String() string {
 }
 
 func (k LexKindName) validate() error {
-	if k == "" {
-		return fmt.Errorf("kind doesn't allow to be the empty string")
-	}
-	if !lexKindNameRE.Match([]byte(k)) {
-		return fmt.Errorf("kind must be %v", lexKindNamePattern)
+	err := validateIdentifier(k.String())
+	if err != nil {
+		return fmt.Errorf("invalid kind name: %v", err)
 	}
 	return nil
 }
 
-const lexKindNamePattern = "[A-Za-z_][0-9A-Za-z_]*"
-
-var lexKindNameRE = regexp.MustCompile(lexKindNamePattern)
-
 // LexPattern represents a pattern of a lexeme.
 // The pattern is written in regular expression.
 type LexPattern string
@@ -99,19 +94,38 @@ func (m LexModeName) String() string {
 }
 
 func (m LexModeName) validate() error {
-	if m.isNil() || !lexModeNameRE.Match([]byte(m)) {
-		return fmt.Errorf("mode must be %v", lexModeNamePattern)
+	err := validateIdentifier(m.String())
+	if err != nil {
+		return fmt.Errorf("invalid mode name: %v", err)
 	}
 	return nil
 }
 
-func (m LexModeName) isNil() bool {
-	return m == LexModeNameNil
+const idPattern = `^[A-Za-z](_?[0-9A-Za-z]+)*$`
+
+var idRE = regexp.MustCompile(idPattern)
+
+func validateIdentifier(id string) error {
+	if id == "" {
+		return fmt.Errorf("identifier doesn't allow to be the empty string")
+	}
+	if !idRE.MatchString(id) {
+		return fmt.Errorf("identifier must be %v", idPattern)
+	}
+	return nil
 }
 
-const lexModeNamePattern = "[A-Za-z_][0-9A-Za-z_]*"
+func SnakeCaseToUpperCamelCase(snake string) string {
+	elems := strings.Split(snake, "_")
+	for i, e := range elems {
+		if len(e) == 0 {
+			continue
+		}
+		elems[i] = strings.ToUpper(string(e[0])) + e[1:]
+	}
 
-var lexModeNameRE = regexp.MustCompile(lexModeNamePattern)
+	return strings.Join(elems, "")
+}
 
 type LexEntry struct {
 	Kind     LexKindName   `json:"kind"`
@@ -185,9 +199,119 @@ func (s *LexSpec) Validate() error {
 			}
 		}
 	}
+	{
+		kinds := []string{}
+		modes := []string{
+			LexModeNameDefault.String(), // This is a predefined mode.
+		}
+		for _, e := range s.Entries {
+			if e.Fragment {
+				continue
+			}
+
+			kinds = append(kinds, e.Kind.String())
+
+			for _, m := range e.Modes {
+				modes = append(modes, m.String())
+			}
+		}
+
+		kindErrs := findSpellingInconsistenciesErrors(kinds, nil)
+		modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error {
+			if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(LexModeNameDefault.String()) {
+				var b strings.Builder
+				fmt.Fprintf(&b, "%+v", ids[0])
+				for _, id := range ids[1:] {
+					fmt.Fprintf(&b, ", %+v", id)
+				}
+				return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", LexModeNameDefault, b.String())
+			}
+			return nil
+		})
+		errs := append(kindErrs, modeErrs...)
+		if len(errs) > 0 {
+			var b strings.Builder
+			fmt.Fprintf(&b, "%v", errs[0])
+			for _, err := range errs[1:] {
+				fmt.Fprintf(&b, "\n%v", err)
+			}
+			return fmt.Errorf(b.String())
+		}
+	}
+
 	return nil
 }
 
+func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error {
+	duplicated := FindSpellingInconsistencies(ids)
+	if len(duplicated) == 0 {
+		return nil
+	}
+
+	var errs []error
+	for _, dup := range duplicated {
+		err := hook(dup)
+		if err != nil {
+			errs = append(errs, err)
+			continue
+		}
+
+		var b strings.Builder
+		fmt.Fprintf(&b, "%+v", dup[0])
+		for _, id := range dup[1:] {
+			fmt.Fprintf(&b, ", %+v", id)
+		}
+		err = fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String())
+		errs = append(errs, err)
+	}
+
+	return errs
+}
+
+// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same
+// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same
+// in UpperCamelCase. Thus they are considere to be spelling inconsistency.
+func FindSpellingInconsistencies(ids []string) [][]string {
+	m := map[string][]string{}
+	for _, id := range removeDuplicates(ids) {
+		c := SnakeCaseToUpperCamelCase(id)
+		m[c] = append(m[c], id)
+	}
+
+	var duplicated [][]string
+	for _, camels := range m {
+		if len(camels) == 1 {
+			continue
+		}
+		duplicated = append(duplicated, camels)
+	}
+
+	for _, dup := range duplicated {
+		sort.Slice(dup, func(i, j int) bool {
+			return dup[i] < dup[j]
+		})
+	}
+	sort.Slice(duplicated, func(i, j int) bool {
+		return duplicated[i][0] < duplicated[j][0]
+	})
+
+	return duplicated
+}
+
+func removeDuplicates(s []string) []string {
+	m := map[string]struct{}{}
+	for _, v := range s {
+		m[v] = struct{}{}
+	}
+
+	var unique []string
+	for v := range m {
+		unique = append(unique, v)
+	}
+
+	return unique
+}
+
 // StateID represents an ID of a state of a transition table.
 type StateID int
 
diff --git a/spec/spec_test.go b/spec/spec_test.go
new file mode 100644
index 0000000..54d7b7d
--- /dev/null
+++ b/spec/spec_test.go
@@ -0,0 +1,211 @@
+package spec
+
+import (
+	"fmt"
+	"testing"
+)
+
+var idTests = []struct {
+	id      string
+	invalid bool
+}{
+	{
+		id: "foo",
+	},
+	{
+		id: "foo2",
+	},
+	{
+		id: "foo_bar_baz",
+	},
+	{
+		id: "f_o_o",
+	},
+	{
+		id:      "2foo",
+		invalid: true,
+	},
+	{
+		id:      "_foo",
+		invalid: true,
+	},
+	{
+		id:      "foo_",
+		invalid: true,
+	},
+	{
+		id:      "foo__bar",
+		invalid: true,
+	},
+}
+
+func TestValidateIdentifier(t *testing.T) {
+	for _, tt := range idTests {
+		t.Run(tt.id, func(t *testing.T) {
+			err := validateIdentifier(tt.id)
+			if tt.invalid {
+				if err == nil {
+					t.Errorf("expected error didn't occur")
+				}
+			} else {
+				if err != nil {
+					t.Errorf("unexpected error occurred: %v", err)
+				}
+			}
+		})
+	}
+}
+
+func TestLexKindName_validate(t *testing.T) {
+	for _, tt := range idTests {
+		t.Run(tt.id, func(t *testing.T) {
+			err := LexKindName(tt.id).validate()
+			if tt.invalid {
+				if err == nil {
+					t.Errorf("expected error didn't occur")
+				}
+			} else {
+				if err != nil {
+					t.Errorf("unexpected error occurred: %v", err)
+				}
+			}
+		})
+	}
+}
+
+func TestLexModeName_validate(t *testing.T) {
+	for _, tt := range idTests {
+		t.Run(tt.id, func(t *testing.T) {
+			err := LexModeName(tt.id).validate()
+			if tt.invalid {
+				if err == nil {
+					t.Errorf("expected error didn't occur")
+				}
+			} else {
+				if err != nil {
+					t.Errorf("unexpected error occurred: %v", err)
+				}
+			}
+		})
+	}
+}
+
+func TestSnakeCaseToUpperCamelCase(t *testing.T) {
+	tests := []struct {
+		snake string
+		camel string
+	}{
+		{
+			snake: "foo",
+			camel: "Foo",
+		},
+		{
+			snake: "foo_bar",
+			camel: "FooBar",
+		},
+		{
+			snake: "foo_bar_baz",
+			camel: "FooBarBaz",
+		},
+		{
+			snake: "Foo",
+			camel: "Foo",
+		},
+		{
+			snake: "fooBar",
+			camel: "FooBar",
+		},
+		{
+			snake: "FOO",
+			camel: "FOO",
+		},
+		{
+			snake: "FOO_BAR",
+			camel: "FOOBAR",
+		},
+		{
+			snake: "_foo_bar_",
+			camel: "FooBar",
+		},
+		{
+			snake: "___foo___bar___",
+			camel: "FooBar",
+		},
+	}
+	for _, tt := range tests {
+		c := SnakeCaseToUpperCamelCase(tt.snake)
+		if c != tt.camel {
+			t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c)
+		}
+	}
+}
+
+func TestFindSpellingInconsistencies(t *testing.T) {
+	tests := []struct {
+		ids        []string
+		duplicated [][]string
+	}{
+		{
+			ids:        []string{"foo", "foo"},
+			duplicated: nil,
+		},
+		{
+			ids:        []string{"foo", "Foo"},
+			duplicated: [][]string{{"Foo", "foo"}},
+		},
+		{
+			ids:        []string{"foo", "foo", "Foo"},
+			duplicated: [][]string{{"Foo", "foo"}},
+		},
+		{
+			ids:        []string{"foo_bar_baz", "FooBarBaz"},
+			duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}},
+		},
+		{
+			ids:        []string{"foo", "Foo", "bar", "Bar"},
+			duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
+		},
+		{
+			ids:        []string{"foo", "Foo", "bar", "Bar", "baz", "bra"},
+			duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
+		},
+	}
+	for i, tt := range tests {
+		t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+			duplicated := FindSpellingInconsistencies(tt.ids)
+			if len(duplicated) != len(tt.duplicated) {
+				t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated)
+			}
+			for i, dupIDs := range duplicated {
+				if len(dupIDs) != len(tt.duplicated[i]) {
+					t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
+				}
+				for j, id := range dupIDs {
+					if id != tt.duplicated[i][j] {
+						t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
+					}
+				}
+			}
+		})
+	}
+}
+
+func TestLexSpec_Validate(t *testing.T) {
+	// We expect that the spelling inconsistency error will occur.
+	spec := &LexSpec{
+		Entries: []*LexEntry{
+			{
+				Modes: []LexModeName{
+					// 'Default' is the spelling inconsistency because 'default' is predefined.
+					"Default",
+				},
+				Kind:    "foo",
+				Pattern: "foo",
+			},
+		},
+	}
+	err := spec.Validate()
+	if err == nil {
+		t.Fatalf("expected error didn't occur")
+	}
+}
author	Ryo Nihei <nihei.dev@gmail.com>	2021-09-18 17:07:09 +0900
committer	Ryo Nihei <nihei.dev@gmail.com>	2021-09-18 17:07:09 +0900
commit	fe865a812401c2c612f2cd17cedd4728dc4798f7 (patch)
tree	007dbc653364809e2273ba95aef0f1bea2006127
parent	Update CHANGELOG (diff)
download	tre-fe865a812401c2c612f2cd17cedd4728dc4798f7.tar.gz tre-fe865a812401c2c612f2cd17cedd4728dc4798f7.tar.xz