diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-24 01:17:27 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-09-24 13:40:59 +0900 |
commit | a8ed73f786fa9dd28965e4bf915022eb4a90bbba (patch) | |
tree | b760670fb2eda9a06acafbab3e7df56e4f4d7bf4 | |
parent | Remove --debug option from compile command (diff) | |
download | tre-a8ed73f786fa9dd28965e4bf915022eb4a90bbba.tar.gz tre-a8ed73f786fa9dd28965e4bf915022eb4a90bbba.tar.xz |
Disallow upper cases in an identifier
Diffstat (limited to '')
-rw-r--r-- | README.md | 37 | ||||
-rw-r--r-- | driver/lexer_test.go | 168 | ||||
-rw-r--r-- | spec/spec.go | 2 | ||||
-rw-r--r-- | spec/spec_test.go | 8 |
4 files changed, 116 insertions, 99 deletions
@@ -165,25 +165,34 @@ The lexical specification format to be passed to `maleeni compile` command is as top level object: -| Field | Type | Nullable | Description | -|---------|------------------------|----------|---------------------------------------------------------------------------------------------------------------------------| -| name | string | false | A specification name. | -| entries | array of entry objects | false | An array of entries sorted by priority. The first element has the highest priority, and the last has the lowest priority. | +| Field | Type | Domain | Nullable | Description | +|---------|------------------------|--------|----------|---------------------------------------------------------------------------------------------------------------------------| +| name | string | id | false | A specification name. | +| entries | array of entry objects | N/A | false | An array of entries sorted by priority. The first element has the highest priority, and the last has the lowest priority. | entry object: -| Field | Type | Nullable | Description | -|----------|------------------|----------|-----------------------------------------------------------------------------------------------------------------------| -| kind | string | false | A name of a token kind. The name must be unique, but duplicate names between fragments and non-fragments are allowed. | -| pattern | string | false | A pattern in a regular expression | -| modes | array of strings | true | Mode names that an entry is enabled in (default: "default") | -| push | string | true | A mode name that the lexer pushes to own mode stack when a token matching the pattern appears | -| pop | bool | true | When `pop` is `true`, the lexer pops a mode from own mode stack. | -| fragment | bool | true | When `fragment` is `true`, its entry is a fragment. | +| Field | Type | Domain | Nullable | Description | +|----------|------------------|--------|----------|-----------------------------------------------------------------------------------------------------------------------| +| kind | string | id | false | A name of a token kind. The name must be unique, but duplicate names between fragments and non-fragments are allowed. | +| pattern | string | regexp | false | A pattern in a regular expression | +| modes | array of strings | N/A | true | Mode names that an entry is enabled in (default: "default") | +| push | string | id | true | A mode name that the lexer pushes to own mode stack when a token matching the pattern appears | +| pop | bool | N/A | true | When `pop` is `true`, the lexer pops a mode from own mode stack. | +| fragment | bool | N/A | true | When `fragment` is `true`, its entry is a fragment. | -See [Regular Expression Syntax](#regular-expression-syntax) for more details on the regular expression syntax. +See [Identifier](#identifier) and [Regular Expression](#regular-expression) for more details on `id` domain and `regexp` domain. -## Regular Expression Syntax +## Identifier + +`id` represents an identifier and must follow the rules below: + +* `id` must be a lower snake case. It can contain only `a` to `z`, `0` to `9`, and `_`. +* The first and last characters must be one of `a` to `z`. + +## Regular Expression + +`regexp` represents a regular expression. Its syntax is below: ⚠️ In JSON, you need to write `\` as `\\`. diff --git a/driver/lexer_test.go b/driver/lexer_test.go index 8af3817..549800a 100644 --- a/driver/lexer_test.go +++ b/driver/lexer_test.go @@ -232,7 +232,7 @@ func TestLexer_Next(t *testing.T) { // maleeni cannot handle the null character in patterns because compiler.lexer, // specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist. // If a pattern needs a null character, use code point expression \u{0000}. - newLexEntryDefaultNOP("char1Byte", "[\x01-\x7f]"), + newLexEntryDefaultNOP("char_1_byte", "[\x01-\x7f]"), }, }, src: string([]byte{ @@ -242,10 +242,10 @@ func TestLexer_Next(t *testing.T) { 0x7f, }), tokens: []*Token{ - newTokenDefault(1, 1, "char1Byte", []byte{0x01}), - newTokenDefault(1, 1, "char1Byte", []byte{0x02}), - newTokenDefault(1, 1, "char1Byte", []byte{0x7e}), - newTokenDefault(1, 1, "char1Byte", []byte{0x7f}), + newTokenDefault(1, 1, "char_1_byte", []byte{0x01}), + newTokenDefault(1, 1, "char_1_byte", []byte{0x02}), + newTokenDefault(1, 1, "char_1_byte", []byte{0x7e}), + newTokenDefault(1, 1, "char_1_byte", []byte{0x7f}), newEOFTokenDefault(), }, }, @@ -254,7 +254,7 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // all 2 byte characters - newLexEntryDefaultNOP("char2Byte", "[\xc2\x80-\xdf\xbf]"), + newLexEntryDefaultNOP("char_2_byte", "[\xc2\x80-\xdf\xbf]"), }, }, src: string([]byte{ @@ -264,10 +264,10 @@ func TestLexer_Next(t *testing.T) { 0xdf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "char2Byte", []byte{0xc2, 0x80}), - newTokenDefault(1, 1, "char2Byte", []byte{0xc2, 0x81}), - newTokenDefault(1, 1, "char2Byte", []byte{0xdf, 0xbe}), - newTokenDefault(1, 1, "char2Byte", []byte{0xdf, 0xbf}), + newTokenDefault(1, 1, "char_2_byte", []byte{0xc2, 0x80}), + newTokenDefault(1, 1, "char_2_byte", []byte{0xc2, 0x81}), + newTokenDefault(1, 1, "char_2_byte", []byte{0xdf, 0xbe}), + newTokenDefault(1, 1, "char_2_byte", []byte{0xdf, 0xbf}), newEOFTokenDefault(), }, }, @@ -276,14 +276,14 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // All bytes are the same. - newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"), + newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"), }, }, src: string([]byte{ 0xe0, 0xa0, 0x80, }), tokens: []*Token{ - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x80}), newEOFTokenDefault(), }, }, @@ -292,7 +292,7 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // The first two bytes are the same. - newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"), + newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"), }, }, src: string([]byte{ @@ -302,10 +302,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xa0, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0xbe}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0xbf}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0xbe}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0xbf}), newEOFTokenDefault(), }, }, @@ -314,7 +314,7 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // The first byte are the same. - newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"), + newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"), }, }, src: string([]byte{ @@ -324,10 +324,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -336,7 +336,7 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // all 3 byte characters - newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"), + newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"), }, }, src: string([]byte{ @@ -358,22 +358,22 @@ func TestLexer_Next(t *testing.T) { 0xef, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbf}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe1, 0x80, 0x80}), - newTokenDefault(1, 1, "char3Byte", []byte{0xe1, 0x80, 0x81}), - newTokenDefault(1, 1, "char3Byte", []byte{0xec, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char3Byte", []byte{0xec, 0xbf, 0xbf}), - newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x80, 0x80}), - newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x80, 0x81}), - newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x9f, 0xbe}), - newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x9f, 0xbf}), - newTokenDefault(1, 1, "char3Byte", []byte{0xee, 0x80, 0x80}), - newTokenDefault(1, 1, "char3Byte", []byte{0xee, 0x80, 0x81}), - newTokenDefault(1, 1, "char3Byte", []byte{0xef, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char3Byte", []byte{0xef, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x80}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x81}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe1, 0x80, 0x80}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xe1, 0x80, 0x81}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xec, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xec, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xed, 0x80, 0x80}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xed, 0x80, 0x81}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xed, 0x9f, 0xbe}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xed, 0x9f, 0xbf}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xee, 0x80, 0x80}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xee, 0x80, 0x81}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xef, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_3_byte", []byte{0xef, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -382,14 +382,14 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // All bytes are the same. - newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"), + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"), }, }, src: string([]byte{ 0xf0, 0x90, 0x80, 0x80, }), tokens: []*Token{ - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}), newEOFTokenDefault(), }, }, @@ -398,7 +398,7 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // The first 3 bytes are the same. - newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"), + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"), }, }, src: string([]byte{ @@ -408,10 +408,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0x80, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0xbe}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0xbf}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0xbe}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0xbf}), newEOFTokenDefault(), }, }, @@ -420,7 +420,7 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // The first 2 bytes are the same. - newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"), + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"), }, }, src: string([]byte{ @@ -430,10 +430,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -442,7 +442,7 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // The first byte are the same. - newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"), + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"), }, }, src: string([]byte{ @@ -452,10 +452,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0xbf, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -464,7 +464,7 @@ func TestLexer_Next(t *testing.T) { Name: "test", Entries: []*spec.LexEntry{ // all 4 byte characters - newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"), + newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"), }, }, src: string([]byte{ @@ -482,18 +482,18 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf1, 0x80, 0x80, 0x80}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf1, 0x80, 0x80, 0x81}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf3, 0xbf, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf3, 0xbf, 0xbf, 0xbf}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x80, 0x80, 0x80}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x80, 0x80, 0x81}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x8f, 0xbf, 0xbe}), - newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x8f, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x81}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf1, 0x80, 0x80, 0x80}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf1, 0x80, 0x80, 0x81}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf3, 0xbf, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf3, 0xbf, 0xbf, 0xbf}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf4, 0x80, 0x80, 0x80}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf4, 0x80, 0x80, 0x81}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf4, 0x8f, 0xbf, 0xbe}), + newTokenDefault(1, 1, "char_4_byte", []byte{0xf4, 0x8f, 0xbf, 0xbf}), newEOFTokenDefault(), }, }, @@ -501,12 +501,12 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Name: "test", Entries: []*spec.LexEntry{ - newLexEntryDefaultNOP("NonNumber", "[^0-9]+[0-9]"), + newLexEntryDefaultNOP("non_number", "[^0-9]+[0-9]"), }, }, src: "foo9", tokens: []*Token{ - newTokenDefault(1, 1, "NonNumber", []byte("foo9")), + newTokenDefault(1, 1, "non_number", []byte("foo9")), newEOFTokenDefault(), }, }, @@ -514,18 +514,18 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Name: "test", Entries: []*spec.LexEntry{ - newLexEntryDefaultNOP("char1Byte", "\\u{006E}"), - newLexEntryDefaultNOP("char2Byte", "\\u{03BD}"), - newLexEntryDefaultNOP("char3Byte", "\\u{306B}"), - newLexEntryDefaultNOP("char4Byte", "\\u{01F638}"), + newLexEntryDefaultNOP("char_1_byte", "\\u{006E}"), + newLexEntryDefaultNOP("char_2_byte", "\\u{03BD}"), + newLexEntryDefaultNOP("char_3_byte", "\\u{306B}"), + newLexEntryDefaultNOP("char_4_byte", "\\u{01F638}"), }, }, src: "nνに😸", tokens: []*Token{ - newTokenDefault(1, 1, "char1Byte", []byte{0x6E}), - newTokenDefault(2, 2, "char2Byte", []byte{0xCE, 0xBD}), - newTokenDefault(3, 3, "char3Byte", []byte{0xE3, 0x81, 0xAB}), - newTokenDefault(4, 4, "char4Byte", []byte{0xF0, 0x9F, 0x98, 0xB8}), + newTokenDefault(1, 1, "char_1_byte", []byte{0x6E}), + newTokenDefault(2, 2, "char_2_byte", []byte{0xCE, 0xBD}), + newTokenDefault(3, 3, "char_3_byte", []byte{0xE3, 0x81, 0xAB}), + newTokenDefault(4, 4, "char_4_byte", []byte{0xF0, 0x9F, 0x98, 0xB8}), newEOFTokenDefault(), }, }, @@ -533,15 +533,15 @@ func TestLexer_Next(t *testing.T) { lspec: &spec.LexSpec{ Name: "test", Entries: []*spec.LexEntry{ - newLexEntryDefaultNOP("codePointsAlt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"), + newLexEntryDefaultNOP("code_points_alt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"), }, }, src: "nνに😸", tokens: []*Token{ - newTokenDefault(1, 1, "codePointsAlt", []byte{0x6E}), - newTokenDefault(1, 1, "codePointsAlt", []byte{0xCE, 0xBD}), - newTokenDefault(1, 1, "codePointsAlt", []byte{0xE3, 0x81, 0xAB}), - newTokenDefault(1, 1, "codePointsAlt", []byte{0xF0, 0x9F, 0x98, 0xB8}), + newTokenDefault(1, 1, "code_points_alt", []byte{0x6E}), + newTokenDefault(1, 1, "code_points_alt", []byte{0xCE, 0xBD}), + newTokenDefault(1, 1, "code_points_alt", []byte{0xE3, 0x81, 0xAB}), + newTokenDefault(1, 1, "code_points_alt", []byte{0xF0, 0x9F, 0x98, 0xB8}), newEOFTokenDefault(), }, }, diff --git a/spec/spec.go b/spec/spec.go index 2360201..3d46269 100644 --- a/spec/spec.go +++ b/spec/spec.go @@ -101,7 +101,7 @@ func (m LexModeName) validate() error { return nil } -const idPattern = `^[A-Za-z](_?[0-9A-Za-z]+)*$` +const idPattern = `^[a-z](_?[0-9a-z]+)*$` var idRE = regexp.MustCompile(idPattern) diff --git a/spec/spec_test.go b/spec/spec_test.go index 54d7b7d..e0e920e 100644 --- a/spec/spec_test.go +++ b/spec/spec_test.go @@ -22,6 +22,14 @@ var idTests = []struct { id: "f_o_o", }, { + id: "Foo", + invalid: true, + }, + { + id: "foo_Bar", + invalid: true, + }, + { id: "2foo", invalid: true, }, |