aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-09-24 01:17:27 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-09-24 13:40:59 +0900
commita8ed73f786fa9dd28965e4bf915022eb4a90bbba (patch)
treeb760670fb2eda9a06acafbab3e7df56e4f4d7bf4
parentRemove --debug option from compile command (diff)
downloadtre-a8ed73f786fa9dd28965e4bf915022eb4a90bbba.tar.gz
tre-a8ed73f786fa9dd28965e4bf915022eb4a90bbba.tar.xz
Disallow upper cases in an identifier
Diffstat (limited to '')
-rw-r--r--README.md37
-rw-r--r--driver/lexer_test.go168
-rw-r--r--spec/spec.go2
-rw-r--r--spec/spec_test.go8
4 files changed, 116 insertions, 99 deletions
diff --git a/README.md b/README.md
index e736091..0a05876 100644
--- a/README.md
+++ b/README.md
@@ -165,25 +165,34 @@ The lexical specification format to be passed to `maleeni compile` command is as
top level object:
-| Field | Type | Nullable | Description |
-|---------|------------------------|----------|---------------------------------------------------------------------------------------------------------------------------|
-| name | string | false | A specification name. |
-| entries | array of entry objects | false | An array of entries sorted by priority. The first element has the highest priority, and the last has the lowest priority. |
+| Field | Type | Domain | Nullable | Description |
+|---------|------------------------|--------|----------|---------------------------------------------------------------------------------------------------------------------------|
+| name | string | id | false | A specification name. |
+| entries | array of entry objects | N/A | false | An array of entries sorted by priority. The first element has the highest priority, and the last has the lowest priority. |
entry object:
-| Field | Type | Nullable | Description |
-|----------|------------------|----------|-----------------------------------------------------------------------------------------------------------------------|
-| kind | string | false | A name of a token kind. The name must be unique, but duplicate names between fragments and non-fragments are allowed. |
-| pattern | string | false | A pattern in a regular expression |
-| modes | array of strings | true | Mode names that an entry is enabled in (default: "default") |
-| push | string | true | A mode name that the lexer pushes to own mode stack when a token matching the pattern appears |
-| pop | bool | true | When `pop` is `true`, the lexer pops a mode from own mode stack. |
-| fragment | bool | true | When `fragment` is `true`, its entry is a fragment. |
+| Field | Type | Domain | Nullable | Description |
+|----------|------------------|--------|----------|-----------------------------------------------------------------------------------------------------------------------|
+| kind | string | id | false | A name of a token kind. The name must be unique, but duplicate names between fragments and non-fragments are allowed. |
+| pattern | string | regexp | false | A pattern in a regular expression |
+| modes | array of strings | N/A | true | Mode names that an entry is enabled in (default: "default") |
+| push | string | id | true | A mode name that the lexer pushes to own mode stack when a token matching the pattern appears |
+| pop | bool | N/A | true | When `pop` is `true`, the lexer pops a mode from own mode stack. |
+| fragment | bool | N/A | true | When `fragment` is `true`, its entry is a fragment. |
-See [Regular Expression Syntax](#regular-expression-syntax) for more details on the regular expression syntax.
+See [Identifier](#identifier) and [Regular Expression](#regular-expression) for more details on `id` domain and `regexp` domain.
-## Regular Expression Syntax
+## Identifier
+
+`id` represents an identifier and must follow the rules below:
+
+* `id` must be a lower snake case. It can contain only `a` to `z`, `0` to `9`, and `_`.
+* The first and last characters must be one of `a` to `z`.
+
+## Regular Expression
+
+`regexp` represents a regular expression. Its syntax is below:
⚠️ In JSON, you need to write `\` as `\\`.
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 8af3817..549800a 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -232,7 +232,7 @@ func TestLexer_Next(t *testing.T) {
// maleeni cannot handle the null character in patterns because compiler.lexer,
// specifically read() and restore(), recognizes the null characters as that a symbol doesn't exist.
// If a pattern needs a null character, use code point expression \u{0000}.
- newLexEntryDefaultNOP("char1Byte", "[\x01-\x7f]"),
+ newLexEntryDefaultNOP("char_1_byte", "[\x01-\x7f]"),
},
},
src: string([]byte{
@@ -242,10 +242,10 @@ func TestLexer_Next(t *testing.T) {
0x7f,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char1Byte", []byte{0x01}),
- newTokenDefault(1, 1, "char1Byte", []byte{0x02}),
- newTokenDefault(1, 1, "char1Byte", []byte{0x7e}),
- newTokenDefault(1, 1, "char1Byte", []byte{0x7f}),
+ newTokenDefault(1, 1, "char_1_byte", []byte{0x01}),
+ newTokenDefault(1, 1, "char_1_byte", []byte{0x02}),
+ newTokenDefault(1, 1, "char_1_byte", []byte{0x7e}),
+ newTokenDefault(1, 1, "char_1_byte", []byte{0x7f}),
newEOFTokenDefault(),
},
},
@@ -254,7 +254,7 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// all 2 byte characters
- newLexEntryDefaultNOP("char2Byte", "[\xc2\x80-\xdf\xbf]"),
+ newLexEntryDefaultNOP("char_2_byte", "[\xc2\x80-\xdf\xbf]"),
},
},
src: string([]byte{
@@ -264,10 +264,10 @@ func TestLexer_Next(t *testing.T) {
0xdf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char2Byte", []byte{0xc2, 0x80}),
- newTokenDefault(1, 1, "char2Byte", []byte{0xc2, 0x81}),
- newTokenDefault(1, 1, "char2Byte", []byte{0xdf, 0xbe}),
- newTokenDefault(1, 1, "char2Byte", []byte{0xdf, 0xbf}),
+ newTokenDefault(1, 1, "char_2_byte", []byte{0xc2, 0x80}),
+ newTokenDefault(1, 1, "char_2_byte", []byte{0xc2, 0x81}),
+ newTokenDefault(1, 1, "char_2_byte", []byte{0xdf, 0xbe}),
+ newTokenDefault(1, 1, "char_2_byte", []byte{0xdf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -276,14 +276,14 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// All bytes are the same.
- newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
+ newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\x80]"),
},
},
src: string([]byte{
0xe0, 0xa0, 0x80,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x80}),
newEOFTokenDefault(),
},
},
@@ -292,7 +292,7 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// The first two bytes are the same.
- newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
+ newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xa0\xbf]"),
},
},
src: string([]byte{
@@ -302,10 +302,10 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xa0, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0xbe}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0xbf}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x80}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x81}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0xbe}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -314,7 +314,7 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// The first byte are the same.
- newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
+ newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xe0\xbf\xbf]"),
},
},
src: string([]byte{
@@ -324,10 +324,10 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x80}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x81}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -336,7 +336,7 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// all 3 byte characters
- newLexEntryDefaultNOP("char3Byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
+ newLexEntryDefaultNOP("char_3_byte", "[\xe0\xa0\x80-\xef\xbf\xbf]"),
},
},
src: string([]byte{
@@ -358,22 +358,22 @@ func TestLexer_Next(t *testing.T) {
0xef, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x80}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xa0, 0x81}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe0, 0xbf, 0xbf}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe1, 0x80, 0x80}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xe1, 0x80, 0x81}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xec, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xec, 0xbf, 0xbf}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x80, 0x80}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x80, 0x81}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x9f, 0xbe}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xed, 0x9f, 0xbf}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xee, 0x80, 0x80}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xee, 0x80, 0x81}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xef, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char3Byte", []byte{0xef, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x80}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xa0, 0x81}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe0, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe1, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xe1, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xec, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xec, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xed, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xed, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xed, 0x9f, 0xbe}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xed, 0x9f, 0xbf}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xee, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xee, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xef, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_3_byte", []byte{0xef, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -382,14 +382,14 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// All bytes are the same.
- newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\x80]"),
},
},
src: string([]byte{
0xf0, 0x90, 0x80, 0x80,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}),
newEOFTokenDefault(),
},
},
@@ -398,7 +398,7 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// The first 3 bytes are the same.
- newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\x80\xbf]"),
},
},
src: string([]byte{
@@ -408,10 +408,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0x80, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0xbe}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0xbf}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0xbe}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -420,7 +420,7 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// The first 2 bytes are the same.
- newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\x90\xbf\xbf]"),
},
},
src: string([]byte{
@@ -430,10 +430,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -442,7 +442,7 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// The first byte are the same.
- newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf0\xbf\xbf\xbf]"),
},
},
src: string([]byte{
@@ -452,10 +452,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0xbf, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -464,7 +464,7 @@ func TestLexer_Next(t *testing.T) {
Name: "test",
Entries: []*spec.LexEntry{
// all 4 byte characters
- newLexEntryDefaultNOP("char4Byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
+ newLexEntryDefaultNOP("char_4_byte", "[\xf0\x90\x80\x80-\xf4\x8f\xbf\xbf]"),
},
},
src: string([]byte{
@@ -482,18 +482,18 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x80}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0x90, 0x80, 0x81}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf1, 0x80, 0x80, 0x80}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf1, 0x80, 0x80, 0x81}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x80, 0x80, 0x80}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x80, 0x80, 0x81}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
- newTokenDefault(1, 1, "char4Byte", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf1, 0x80, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf1, 0x80, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf4, 0x80, 0x80, 0x80}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf4, 0x80, 0x80, 0x81}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "char_4_byte", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -501,12 +501,12 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Name: "test",
Entries: []*spec.LexEntry{
- newLexEntryDefaultNOP("NonNumber", "[^0-9]+[0-9]"),
+ newLexEntryDefaultNOP("non_number", "[^0-9]+[0-9]"),
},
},
src: "foo9",
tokens: []*Token{
- newTokenDefault(1, 1, "NonNumber", []byte("foo9")),
+ newTokenDefault(1, 1, "non_number", []byte("foo9")),
newEOFTokenDefault(),
},
},
@@ -514,18 +514,18 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Name: "test",
Entries: []*spec.LexEntry{
- newLexEntryDefaultNOP("char1Byte", "\\u{006E}"),
- newLexEntryDefaultNOP("char2Byte", "\\u{03BD}"),
- newLexEntryDefaultNOP("char3Byte", "\\u{306B}"),
- newLexEntryDefaultNOP("char4Byte", "\\u{01F638}"),
+ newLexEntryDefaultNOP("char_1_byte", "\\u{006E}"),
+ newLexEntryDefaultNOP("char_2_byte", "\\u{03BD}"),
+ newLexEntryDefaultNOP("char_3_byte", "\\u{306B}"),
+ newLexEntryDefaultNOP("char_4_byte", "\\u{01F638}"),
},
},
src: "nνに😸",
tokens: []*Token{
- newTokenDefault(1, 1, "char1Byte", []byte{0x6E}),
- newTokenDefault(2, 2, "char2Byte", []byte{0xCE, 0xBD}),
- newTokenDefault(3, 3, "char3Byte", []byte{0xE3, 0x81, 0xAB}),
- newTokenDefault(4, 4, "char4Byte", []byte{0xF0, 0x9F, 0x98, 0xB8}),
+ newTokenDefault(1, 1, "char_1_byte", []byte{0x6E}),
+ newTokenDefault(2, 2, "char_2_byte", []byte{0xCE, 0xBD}),
+ newTokenDefault(3, 3, "char_3_byte", []byte{0xE3, 0x81, 0xAB}),
+ newTokenDefault(4, 4, "char_4_byte", []byte{0xF0, 0x9F, 0x98, 0xB8}),
newEOFTokenDefault(),
},
},
@@ -533,15 +533,15 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Name: "test",
Entries: []*spec.LexEntry{
- newLexEntryDefaultNOP("codePointsAlt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"),
+ newLexEntryDefaultNOP("code_points_alt", "[\\u{006E}\\u{03BD}\\u{306B}\\u{01F638}]"),
},
},
src: "nνに😸",
tokens: []*Token{
- newTokenDefault(1, 1, "codePointsAlt", []byte{0x6E}),
- newTokenDefault(1, 1, "codePointsAlt", []byte{0xCE, 0xBD}),
- newTokenDefault(1, 1, "codePointsAlt", []byte{0xE3, 0x81, 0xAB}),
- newTokenDefault(1, 1, "codePointsAlt", []byte{0xF0, 0x9F, 0x98, 0xB8}),
+ newTokenDefault(1, 1, "code_points_alt", []byte{0x6E}),
+ newTokenDefault(1, 1, "code_points_alt", []byte{0xCE, 0xBD}),
+ newTokenDefault(1, 1, "code_points_alt", []byte{0xE3, 0x81, 0xAB}),
+ newTokenDefault(1, 1, "code_points_alt", []byte{0xF0, 0x9F, 0x98, 0xB8}),
newEOFTokenDefault(),
},
},
diff --git a/spec/spec.go b/spec/spec.go
index 2360201..3d46269 100644
--- a/spec/spec.go
+++ b/spec/spec.go
@@ -101,7 +101,7 @@ func (m LexModeName) validate() error {
return nil
}
-const idPattern = `^[A-Za-z](_?[0-9A-Za-z]+)*$`
+const idPattern = `^[a-z](_?[0-9a-z]+)*$`
var idRE = regexp.MustCompile(idPattern)
diff --git a/spec/spec_test.go b/spec/spec_test.go
index 54d7b7d..e0e920e 100644
--- a/spec/spec_test.go
+++ b/spec/spec_test.go
@@ -22,6 +22,14 @@ var idTests = []struct {
id: "f_o_o",
},
{
+ id: "Foo",
+ invalid: true,
+ },
+ {
+ id: "foo_Bar",
+ invalid: true,
+ },
+ {
id: "2foo",
invalid: true,
},