diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-18 17:07:09 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-18 17:07:09 +0900 |
commit | fe865a812401c2c612f2cd17cedd4728dc4798f7 (patch) | |
tree | 007dbc653364809e2273ba95aef0f1bea2006127 | |
parent | Update CHANGELOG (diff) | |
download | tre-fe865a812401c2c612f2cd17cedd4728dc4798f7.tar.gz tre-fe865a812401c2c612f2cd17cedd4728dc4798f7.tar.xz |
Generate constant values representing mode IDs, mode names, kind IDs, and kind names
-rw-r--r-- | driver/lexer_test.go | 154 | ||||
-rw-r--r-- | driver/template.go | 102 | ||||
-rw-r--r-- | spec/spec.go | 154 | ||||
-rw-r--r-- | spec/spec_test.go | 211 |
4 files changed, 515 insertions, 106 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go index ebb4aad..a742bad 100644 --- a/driver/lexer_test.go +++ b/driver/lexer_test.go @@ -227,7 +227,7 @@ func TestLexer_Next(t *testing.T) { // maleeni cannot handle the null character in patterns because compiler.lexer, // specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist. // If a pattern needs a null character, use code point expression \u{0000}. - newLexEntryDefaultNOP("1ByteChar", "[\x01-\x7f]"), + newLexEntryDefaultNOP("char1Byte", "[\x01-\x7f]"), }, }, src: string([]byte{ @@ -237,10 +237,10 @@ func TestLexer_Next(t *testing.T) { 0x7f, }), tokens: []*Token{ - newTokenDefault(1, 1, "1ByteChar", []byte{0x01}), - newTokenDefault(1, 1, "1ByteChar", []byte{0x02}), - newTokenDefault(1, 1, "1ByteChar", []byte{0x7e}), - newTokenDefault(1, 1, "1ByteChar", []byte{0x7f}), + newTokenDefault(1, 1, "char1Byte", []byte{0x01}), + newTokenDefault(1, 1, "char1Byte", []byte{0x02}), + newTokenDefault(1, 1, "char1Byte", []byte{0x7e}), + newTokenDefault(1, 1, "char1Byte", []byte{0x7f}), newEOFTokenDefault(), }, }, @@ -248,7 +248,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // all 2 byte characters - newLexEntryDefaultNOP("2ByteChar", "[\xc2\x80-\xdf\xbf]"), + newLexEntryDefaultNOP("char2Byte", "[\xc2\x80-\xdf\xbf]"), }, }, src: string([]byte{ @@ -258,10 +258,10 @@ func TestLexer_Next(t *testing.T) { 0xdf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x80}), - newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x81}), - newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbe}), - newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbf}), + newTokenDefault(1, 1, "char2Byte", []byte{0xc2, 0x80}), + newTokenDefault(1, 1, "char2Byte", []byte{0xc2, 0x81}), + newTokenDefault(1, 1, "char2Byte", []byte{0xdf, 0xbe}), + newTokenDefault(1, 1, "char2Byte", []byte{0xdf, 0xbf}), newEOFTokenDefault(), }, }, @@ -269,14 +269,14 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // All bytes are the same. - newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\x80]"), + newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"), }, }, src: string([]byte{ 0xe0, 0xa0, 0x80, }), tokens: []*Token{ - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}), newEOFTokenDefault(), }, }, @@ -284,7 +284,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first two bytes are the same. - newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xa0\xbf]"), + newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"), }, }, src: string([]byte{ @@ -294,10 +294,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xa0, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0xbe}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0xbf}), newEOFTokenDefault(), }, }, @@ -305,7 +305,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first byte are the same. - newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xe0\xbf\xbf]"), + newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"), }, }, src: string([]byte{ @@ -315,10 +315,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -326,7 +326,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // all 3 byte characters - newLexEntryDefaultNOP("3ByteChar", "[\xe0\xa0\x80-\xef\xbf\xbf]"), + newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"), }, }, src: string([]byte{ @@ -348,22 +348,22 @@ func TestLexer_Next(t *testing.T) { 0xef, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x80}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x81}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x80}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x81}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x80}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x81}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}), - newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe1, 0x80, 0x80}), + newTokenDefault(1, 1, "char3Byte", []byte{0xe1, 0x80, 0x81}), + newTokenDefault(1, 1, "char3Byte", []byte{0xec, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char3Byte", []byte{0xec, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x80, 0x80}), + newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x80, 0x81}), + newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x9f, 0xbe}), + newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x9f, 0xbf}), + newTokenDefault(1, 1, "char3Byte", []byte{0xee, 0x80, 0x80}), + newTokenDefault(1, 1, "char3Byte", []byte{0xee, 0x80, 0x81}), + newTokenDefault(1, 1, "char3Byte", []byte{0xef, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char3Byte", []byte{0xef, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -371,14 +371,14 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // All bytes are the same. - newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"), + newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"), }, }, src: string([]byte{ 0xf0, 0x90, 0x80, 0x80, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), newEOFTokenDefault(), }, }, @@ -386,7 +386,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first 3 bytes are the same. - newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"), + newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"), }, }, src: string([]byte{ @@ -396,10 +396,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0x80, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0xbe}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0xbf}), newEOFTokenDefault(), }, }, @@ -407,7 +407,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first 2 bytes are the same. - newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"), + newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"), }, }, src: string([]byte{ @@ -417,10 +417,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -428,7 +428,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // The first byte are the same. - newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"), + newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"), }, }, src: string([]byte{ @@ -438,10 +438,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0xbf, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -449,7 +449,7 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ // all 4 byte characters - newLexEntryDefaultNOP("4ByteChar", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"), + newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"), }, }, src: string([]byte{ @@ -467,18 +467,18 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}), - newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf1, 0x80, 0x80, 0x80}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf1, 0x80, 0x80, 0x81}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf3, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf3, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x80, 0x80, 0x80}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x80, 0x80, 0x81}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x8f, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x8f, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -497,18 +497,18 @@ func TestLexer_Next(t *testing.T) { { lspec: &spec.LexSpec{ Entries: []*spec.LexEntry{ - newLexEntryDefaultNOP("1ByteChar", "\\u{006E}"), - newLexEntryDefaultNOP("2ByteChar", "\\u{03BD}"), - newLexEntryDefaultNOP("3ByteChar", "\\u{306B}"), - newLexEntryDefaultNOP("4ByteChar", "\\u{01F638}"), + newLexEntryDefaultNOP("char1Byte", "\\u{006E}"), + newLexEntryDefaultNOP("char2Byte", "\\u{03BD}"), + newLexEntryDefaultNOP("char3Byte", "\\u{306B}"), + newLexEntryDefaultNOP("char4Byte", "\\u{01F638}"), }, }, src: "nνに😸", tokens: []*Token{ - newTokenDefault(1, 1, "1ByteChar", []byte{0x6E}), - newTokenDefault(2, 2, "2ByteChar", []byte{0xCE, 0xBD}), - newTokenDefault(3, 3, "3ByteChar", []byte{0xE3, 0x81, 0xAB}), - newTokenDefault(4, 4, "4ByteChar", []byte{0xF0, 0x9F, 0x98, 0xB8}), + newTokenDefault(1, 1, "char1Byte", []byte{0x6E}), + newTokenDefault(2, 2, "char2Byte", []byte{0xCE, 0xBD}), + newTokenDefault(3, 3, "char3Byte", []byte{0xE3, 0x81, 0xAB}), + newTokenDefault(4, 4, "char4Byte", []byte{0xF0, 0x9F, 0x98, 0xB8}), newEOFTokenDefault(), }, }, diff --git a/driver/template.go b/driver/template.go index f7caa75..2772135 100644 --- a/driver/template.go +++ b/driver/template.go @@ -35,6 +35,67 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error { lexerSrc = b.String() } + var modeIDsSrc string + { + var b strings.Builder + fmt.Fprintf(&b, "const (\n") + for i, k := range clspec.ModeNames { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, " ModeIDNil ModeID = %v\n", i) + continue + } + fmt.Fprintf(&b, " ModeID%v ModeID = %v\n", spec.SnakeCaseToUpperCamelCase(k.String()), i) + } + fmt.Fprintf(&b, ")") + + modeIDsSrc = b.String() + } + + var modeNamesSrc string + { + var b strings.Builder + fmt.Fprintf(&b, "const (\n") + for i, k := range clspec.ModeNames { + if i == spec.LexModeIDNil.Int() { + fmt.Fprintf(&b, " ModeNameNil = %#v\n", "") + continue + } + fmt.Fprintf(&b, " ModeName%v = %#v\n", spec.SnakeCaseToUpperCamelCase(k.String()), k) + } + fmt.Fprintf(&b, ")") + + modeNamesSrc = b.String() + } + + var kindIDsSrc string + { + var b strings.Builder + fmt.Fprintf(&b, "const (\n") + for i, k := range clspec.KindNames { + if i == spec.LexKindIDNil.Int() { + fmt.Fprintf(&b, " KindIDNil KindID = %v\n", i) + continue + } + fmt.Fprintf(&b, " KindID%v KindID = %v\n", spec.SnakeCaseToUpperCamelCase(k.String()), i) + } + fmt.Fprintf(&b, ")") + + kindIDsSrc = b.String() + } + + var kindNamesSrc string + { + var b strings.Builder + fmt.Fprintf(&b, "const (\n") + fmt.Fprintf(&b, " KindNameNil = %#v\n", "") + for _, k := range clspec.KindNames[1:] { + fmt.Fprintf(&b, " KindName%v = %#v\n", spec.SnakeCaseToUpperCamelCase(k.String()), k) + } + fmt.Fprintf(&b, ")") + + kindNamesSrc = b.String() + } + var specSrc string { t, err := template.New("").Funcs(genTemplateFuncs(clspec)).Parse(lexSpecTemplate) @@ -44,8 +105,8 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error { var b strings.Builder err = t.Execute(&b, map[string]interface{}{ - "initialModeID": clspec.InitialModeID, - "modeIDNil": spec.LexModeIDNil, + "initialModeID": "ModeID" + spec.SnakeCaseToUpperCamelCase(clspec.ModeNames[clspec.InitialModeID].String()), + "modeIDNil": "ModeIDNil", "modeKindIDNil": spec.LexModeKindIDNil, "stateIDNil": spec.StateIDNil, "compressionLevel": clspec.CompressionLevel, @@ -62,6 +123,14 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error { tmpl := `// Code generated by maleeni-go. DO NOT EDIT. {{ .lexerSrc }} +{{ .modeIDsSrc }} + +{{ .modeNamesSrc }} + +{{ .kindIDsSrc }} + +{{ .kindNamesSrc }} + {{ .specSrc }} ` @@ -72,8 +141,12 @@ func GenLexer(clspec *spec.CompiledLexSpec, pkgName string) error { var b strings.Builder err = t.Execute(&b, map[string]string{ - "lexerSrc": lexerSrc, - "specSrc": specSrc, + "lexerSrc": lexerSrc, + "modeIDsSrc": modeIDsSrc, + "modeNamesSrc": modeNamesSrc, + "kindIDsSrc": kindIDsSrc, + "kindNamesSrc": kindNamesSrc, + "specSrc": specSrc, }) if err != nil { return err @@ -239,11 +312,10 @@ func genTemplateFuncs(clspec *spec.CompiledLexSpec) template.FuncMap { fmt.Fprintf(&b, "[]string{\n") for i, name := range clspec.ModeNames { if i == spec.LexModeIDNil.Int() { - fmt.Fprintf(&b, "%#v,\n", "") + fmt.Fprintf(&b, "ModeNameNil,\n") continue } - - fmt.Fprintf(&b, "%#v,\n", name) + fmt.Fprintf(&b, "ModeName%v,\n", spec.SnakeCaseToUpperCamelCase(name.String())) } fmt.Fprintf(&b, "}") return b.String() @@ -290,10 +362,13 @@ func genTemplateFuncs(clspec *spec.CompiledLexSpec) template.FuncMap { continue } - fmt.Fprintf(&b, "{") - fmt.Fprintf(&b, "%v", ids[0]) - for _, v := range ids[1:] { - fmt.Fprintf(&b, ", %v", v) + fmt.Fprintf(&b, "{\n") + for j, id := range ids { + if j == spec.LexModeKindIDNil.Int() { + fmt.Fprintf(&b, "KindIDNil,\n") + continue + } + fmt.Fprintf(&b, "KindID%v,\n", spec.SnakeCaseToUpperCamelCase(string(clspec.KindNames[id].String()))) } fmt.Fprintf(&b, "},\n") } @@ -305,11 +380,10 @@ func genTemplateFuncs(clspec *spec.CompiledLexSpec) template.FuncMap { fmt.Fprintf(&b, "[]string{\n") for i, name := range clspec.KindNames { if i == spec.LexKindIDNil.Int() { - fmt.Fprintf(&b, "%#v,\n", "") + fmt.Fprintf(&b, "KindNameNil,\n") continue } - - fmt.Fprintf(&b, "%#v,\n", name) + fmt.Fprintf(&b, "KindName%v,\n", spec.SnakeCaseToUpperCamelCase(name.String())) } fmt.Fprintf(&b, "}") return b.String() diff --git a/spec/spec.go b/spec/spec.go index 9ac5f4b..62acfc4 100644 --- a/spec/spec.go +++ b/spec/spec.go @@ -3,6 +3,7 @@ package spec import ( "fmt" "regexp" + "sort" "strconv" "strings" ) @@ -42,19 +43,13 @@ func (k LexKindName) String() string { } func (k LexKindName) validate() error { - if k == "" { - return fmt.Errorf("kind doesn't allow to be the empty string") - } - if !lexKindNameRE.Match([]byte(k)) { - return fmt.Errorf("kind must be %v", lexKindNamePattern) + err := validateIdentifier(k.String()) + if err != nil { + return fmt.Errorf("invalid kind name: %v", err) } return nil } -const lexKindNamePattern = "[A-Za-z_][0-9A-Za-z_]*" - -var lexKindNameRE = regexp.MustCompile(lexKindNamePattern) - // LexPattern represents a pattern of a lexeme. // The pattern is written in regular expression. type LexPattern string @@ -99,19 +94,38 @@ func (m LexModeName) String() string { } func (m LexModeName) validate() error { - if m.isNil() || !lexModeNameRE.Match([]byte(m)) { - return fmt.Errorf("mode must be %v", lexModeNamePattern) + err := validateIdentifier(m.String()) + if err != nil { + return fmt.Errorf("invalid mode name: %v", err) } return nil } -func (m LexModeName) isNil() bool { - return m == LexModeNameNil +const idPattern = `^[A-Za-z](_?[0-9A-Za-z]+)*$` + +var idRE = regexp.MustCompile(idPattern) + +func validateIdentifier(id string) error { + if id == "" { + return fmt.Errorf("identifier doesn't allow to be the empty string") + } + if !idRE.MatchString(id) { + return fmt.Errorf("identifier must be %v", idPattern) + } + return nil } -const lexModeNamePattern = "[A-Za-z_][0-9A-Za-z_]*" +func SnakeCaseToUpperCamelCase(snake string) string { + elems := strings.Split(snake, "_") + for i, e := range elems { + if len(e) == 0 { + continue + } + elems[i] = strings.ToUpper(string(e[0])) + e[1:] + } -var lexModeNameRE = regexp.MustCompile(lexModeNamePattern) + return strings.Join(elems, "") +} type LexEntry struct { Kind LexKindName `json:"kind"` @@ -185,9 +199,119 @@ func (s *LexSpec) Validate() error { } } } + { + kinds := []string{} + modes := []string{ + LexModeNameDefault.String(), // This is a predefined mode. + } + for _, e := range s.Entries { + if e.Fragment { + continue + } + + kinds = append(kinds, e.Kind.String()) + + for _, m := range e.Modes { + modes = append(modes, m.String()) + } + } + + kindErrs := findSpellingInconsistenciesErrors(kinds, nil) + modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error { + if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(LexModeNameDefault.String()) { + var b strings.Builder + fmt.Fprintf(&b, "%+v", ids[0]) + for _, id := range ids[1:] { + fmt.Fprintf(&b, ", %+v", id) + } + return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", LexModeNameDefault, b.String()) + } + return nil + }) + errs := append(kindErrs, modeErrs...) + if len(errs) > 0 { + var b strings.Builder + fmt.Fprintf(&b, "%v", errs[0]) + for _, err := range errs[1:] { + fmt.Fprintf(&b, "\n%v", err) + } + return fmt.Errorf(b.String()) + } + } + return nil } +func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error { + duplicated := FindSpellingInconsistencies(ids) + if len(duplicated) == 0 { + return nil + } + + var errs []error + for _, dup := range duplicated { + err := hook(dup) + if err != nil { + errs = append(errs, err) + continue + } + + var b strings.Builder + fmt.Fprintf(&b, "%+v", dup[0]) + for _, id := range dup[1:] { + fmt.Fprintf(&b, ", %+v", id) + } + err = fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String()) + errs = append(errs, err) + } + + return errs +} + +// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same +// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same +// in UpperCamelCase. Thus they are considere to be spelling inconsistency. +func FindSpellingInconsistencies(ids []string) [][]string { + m := map[string][]string{} + for _, id := range removeDuplicates(ids) { + c := SnakeCaseToUpperCamelCase(id) + m[c] = append(m[c], id) + } + + var duplicated [][]string + for _, camels := range m { + if len(camels) == 1 { + continue + } + duplicated = append(duplicated, camels) + } + + for _, dup := range duplicated { + sort.Slice(dup, func(i, j int) bool { + return dup[i] < dup[j] + }) + } + sort.Slice(duplicated, func(i, j int) bool { + return duplicated[i][0] < duplicated[j][0] + }) + + return duplicated +} + +func removeDuplicates(s []string) []string { + m := map[string]struct{}{} + for _, v := range s { + m[v] = struct{}{} + } + + var unique []string + for v := range m { + unique = append(unique, v) + } + + return unique +} + // StateID represents an ID of a state of a transition table. type StateID int diff --git a/spec/spec_test.go b/spec/spec_test.go new file mode 100644 index 0000000..54d7b7d --- /dev/null +++ b/spec/spec_test.go @@ -0,0 +1,211 @@ +package spec + +import ( + "fmt" + "testing" +) + +var idTests = []struct { + id string + invalid bool +}{ + { + id: "foo", + }, + { + id: "foo2", + }, + { + id: "foo_bar_baz", + }, + { + id: "f_o_o", + }, + { + id: "2foo", + invalid: true, + }, + { + id: "_foo", + invalid: true, + }, + { + id: "foo_", + invalid: true, + }, + { + id: "foo__bar", + invalid: true, + }, +} + +func TestValidateIdentifier(t *testing.T) { + for _, tt := range idTests { + t.Run(tt.id, func(t *testing.T) { + err := validateIdentifier(tt.id) + if tt.invalid { + if err == nil { + t.Errorf("expected error didn't occur") + } + } else { + if err != nil { + t.Errorf("unexpected error occurred: %v", err) + } + } + }) + } +} + +func TestLexKindName_validate(t *testing.T) { + for _, tt := range idTests { + t.Run(tt.id, func(t *testing.T) { + err := LexKindName(tt.id).validate() + if tt.invalid { + if err == nil { + t.Errorf("expected error didn't occur") + } + } else { + if err != nil { + t.Errorf("unexpected error occurred: %v", err) + } + } + }) + } +} + +func TestLexModeName_validate(t *testing.T) { + for _, tt := range idTests { + t.Run(tt.id, func(t *testing.T) { + err := LexModeName(tt.id).validate() + if tt.invalid { + if err == nil { + t.Errorf("expected error didn't occur") + } + } else { + if err != nil { + t.Errorf("unexpected error occurred: %v", err) + } + } + }) + } +} + +func TestSnakeCaseToUpperCamelCase(t *testing.T) { + tests := []struct { + snake string + camel string + }{ + { + snake: "foo", + camel: "Foo", + }, + { + snake: "foo_bar", + camel: "FooBar", + }, + { + snake: "foo_bar_baz", + camel: "FooBarBaz", + }, + { + snake: "Foo", + camel: "Foo", + }, + { + snake: "fooBar", + camel: "FooBar", + }, + { + snake: "FOO", + camel: "FOO", + }, + { + snake: "FOO_BAR", + camel: "FOOBAR", + }, + { + snake: "_foo_bar_", + camel: "FooBar", + }, + { + snake: "___foo___bar___", + camel: "FooBar", + }, + } + for _, tt := range tests { + c := SnakeCaseToUpperCamelCase(tt.snake) + if c != tt.camel { + t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c) + } + } +} + +func TestFindSpellingInconsistencies(t *testing.T) { + tests := []struct { + ids []string + duplicated [][]string + }{ + { + ids: []string{"foo", "foo"}, + duplicated: nil, + }, + { + ids: []string{"foo", "Foo"}, + duplicated: [][]string{{"Foo", "foo"}}, + }, + { + ids: []string{"foo", "foo", "Foo"}, + duplicated: [][]string{{"Foo", "foo"}}, + }, + { + ids: []string{"foo_bar_baz", "FooBarBaz"}, + duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}}, + }, + { + ids: []string{"foo", "Foo", "bar", "Bar"}, + duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, + }, + { + ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"}, + duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, + }, + } + for i, tt := range tests { + t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { + duplicated := FindSpellingInconsistencies(tt.ids) + if len(duplicated) != len(tt.duplicated) { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated) + } + for i, dupIDs := range duplicated { + if len(dupIDs) != len(tt.duplicated[i]) { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) + } + for j, id := range dupIDs { + if id != tt.duplicated[i][j] { + t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) + } + } + } + }) + } +} + +func TestLexSpec_Validate(t *testing.T) { + // We expect that the spelling inconsistency error will occur. + spec := &LexSpec{ + Entries: []*LexEntry{ + { + Modes: []LexModeName{ + // 'Default' is the spelling inconsistency because 'default' is predefined. + "Default", + }, + Kind: "foo", + Pattern: "foo", + }, + }, + } + err := spec.Validate() + if err == nil { + t.Fatalf("expected error didn't occur") + } +} |