diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2022-05-09 23:52:56 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2022-05-10 23:14:52 +0900 |
commit | 054307b6c99ab962cfa9bcb4c4d50f3aea5406ea (patch) | |
tree | 73aea74559c64f3bcb55f3a39b2399714b616174 | |
parent | Change the default suffix of a grammar file from .vr to .vartan (diff) | |
download | urubu-054307b6c99ab962cfa9bcb4c4d50f3aea5406ea.tar.gz urubu-054307b6c99ab962cfa9bcb4c4d50f3aea5406ea.tar.xz |
Make the identifier format strict
-rw-r--r-- | README.md | 6 | ||||
-rw-r--r-- | driver/lac_test.go | 14 | ||||
-rw-r--r-- | grammar/grammar_test.go | 2 | ||||
-rw-r--r-- | grammar/lalr1_test.go | 56 | ||||
-rw-r--r-- | grammar/lr0_test.go | 11 | ||||
-rw-r--r-- | grammar/parsing_table_test.go | 62 | ||||
-rw-r--r-- | spec/lexer.go | 34 | ||||
-rw-r--r-- | spec/lexer_test.go | 34 | ||||
-rw-r--r-- | spec/lexspec.json | 2 | ||||
-rw-r--r-- | spec/syntax_error.go | 15 | ||||
-rw-r--r-- | spec/vartan_lexer.go | 4 |
11 files changed, 152 insertions, 88 deletions
@@ -292,9 +292,9 @@ Using the `fragment` keyword, you can also define a fragment that represents a p An identifier is a string that satisfies all of the following rules: -* Contains only lowercase letters (`a`-`z`), numbers (`0`-`9`), and underscores (`_`). -* The first letter is only a lowercase letter. -* The last letter is only a lowercase letter or a number. +* Contains only the lower-case letters (`a`-`z`), the digits (`0`-`9`), and the underscore (`_`). +* The first letter is only the lower-case letters. +* The last letter is only the lower-case letters or the digits. examples: diff --git a/driver/lac_test.go b/driver/lac_test.go index 54001d5..3cee765 100644 --- a/driver/lac_test.go +++ b/driver/lac_test.go @@ -12,11 +12,11 @@ func TestParserWithLAC(t *testing.T) { specSrc := ` #name test; -S - : C C +s + : t t ; -C - : c C +t + : c t | d ; @@ -37,9 +37,9 @@ d: 'd'; "shift/c", "shift/c", "shift/d", - "reduce/C", - "reduce/C", - "reduce/C", + "reduce/t", + "reduce/t", + "reduce/t", "miss", } diff --git a/grammar/grammar_test.go b/grammar/grammar_test.go index 62823e1..e61e422 100644 --- a/grammar/grammar_test.go +++ b/grammar/grammar_test.go @@ -2786,7 +2786,7 @@ s : foo ; -foo #alias Foo +foo #alias bar : 'foo'; `, errs: []*SemanticError{semErrDirInvalidParam}, diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go index 94dfd65..2f8074b 100644 --- a/grammar/lalr1_test.go +++ b/grammar/lalr1_test.go @@ -12,9 +12,9 @@ func TestGenLALR1Automaton(t *testing.T) { src := ` #name test; -S: L eq R | R; -L: ref R | id; -R: L; +s: l eq r | r; +l: ref r | id; +r: l; eq: '='; ref: '*'; id: "[A-Za-z0-9_]+"; @@ -66,35 +66,35 @@ id: "[A-Za-z0-9_]+"; expectedKernels := map[int][]*lrItem{ 0: { - withLookAhead(genLR0Item("S'", 0, "S"), symbolEOF), + withLookAhead(genLR0Item("s'", 0, "s"), symbolEOF), }, 1: { - withLookAhead(genLR0Item("S'", 1, "S"), symbolEOF), + withLookAhead(genLR0Item("s'", 1, "s"), symbolEOF), }, 2: { - withLookAhead(genLR0Item("S", 1, "L", "eq", "R"), symbolEOF), - withLookAhead(genLR0Item("R", 1, "L"), symbolEOF), + withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), symbolEOF), }, 3: { - withLookAhead(genLR0Item("S", 1, "R"), symbolEOF), + withLookAhead(genLR0Item("s", 1, "r"), symbolEOF), }, 4: { - withLookAhead(genLR0Item("L", 1, "ref", "R"), genSym("eq"), symbolEOF), + withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbolEOF), }, 5: { - withLookAhead(genLR0Item("L", 1, "id"), genSym("eq"), symbolEOF), + withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbolEOF), }, 6: { - withLookAhead(genLR0Item("S", 2, "L", "eq", "R"), symbolEOF), + withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbolEOF), }, 7: { - withLookAhead(genLR0Item("L", 2, "ref", "R"), genSym("eq"), symbolEOF), + withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbolEOF), }, 8: { - withLookAhead(genLR0Item("R", 1, "L"), genSym("eq"), symbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbolEOF), }, 9: { - withLookAhead(genLR0Item("S", 3, "L", "eq", "R"), symbolEOF), + withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbolEOF), }, } @@ -102,9 +102,9 @@ id: "[A-Za-z0-9_]+"; { kernelItems: expectedKernels[0], nextStates: map[symbol][]*lrItem{ - genSym("S"): expectedKernels[1], - genSym("L"): expectedKernels[2], - genSym("R"): expectedKernels[3], + genSym("s"): expectedKernels[1], + genSym("l"): expectedKernels[2], + genSym("r"): expectedKernels[3], genSym("ref"): expectedKernels[4], genSym("id"): expectedKernels[5], }, @@ -114,7 +114,7 @@ id: "[A-Za-z0-9_]+"; kernelItems: expectedKernels[1], nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ - genProd("S'", "S"), + genProd("s'", "s"), }, }, { @@ -123,21 +123,21 @@ id: "[A-Za-z0-9_]+"; genSym("eq"): expectedKernels[6], }, reducibleProds: []*production{ - genProd("R", "L"), + genProd("r", "l"), }, }, { kernelItems: expectedKernels[3], nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ - genProd("S", "R"), + genProd("s", "r"), }, }, { kernelItems: expectedKernels[4], nextStates: map[symbol][]*lrItem{ - genSym("R"): expectedKernels[7], - genSym("L"): expectedKernels[8], + genSym("r"): expectedKernels[7], + genSym("l"): expectedKernels[8], genSym("ref"): expectedKernels[4], genSym("id"): expectedKernels[5], }, @@ -147,14 +147,14 @@ id: "[A-Za-z0-9_]+"; kernelItems: expectedKernels[5], nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ - genProd("L", "id"), + genProd("l", "id"), }, }, { kernelItems: expectedKernels[6], nextStates: map[symbol][]*lrItem{ - genSym("R"): expectedKernels[9], - genSym("L"): expectedKernels[8], + genSym("r"): expectedKernels[9], + genSym("l"): expectedKernels[8], genSym("ref"): expectedKernels[4], genSym("id"): expectedKernels[5], }, @@ -164,21 +164,21 @@ id: "[A-Za-z0-9_]+"; kernelItems: expectedKernels[7], nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ - genProd("L", "ref", "R"), + genProd("l", "ref", "r"), }, }, { kernelItems: expectedKernels[8], nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ - genProd("R", "L"), + genProd("r", "l"), }, }, { kernelItems: expectedKernels[9], nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ - genProd("S", "L", "eq", "R"), + genProd("s", "l", "eq", "r"), }, }, } diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go index 1b20d78..b31126e 100644 --- a/grammar/lr0_test.go +++ b/grammar/lr0_test.go @@ -236,10 +236,11 @@ foo : ; bar - : BAR + : b | ; -BAR: "bar"; + +b: "bar"; ` var gram *Grammar @@ -290,7 +291,7 @@ BAR: "bar"; genLR0Item("s", 2, "foo", "bar"), }, 4: { - genLR0Item("bar", 1, "BAR"), + genLR0Item("bar", 1, "b"), }, } @@ -319,7 +320,7 @@ BAR: "bar"; kernelItems: expectedKernels[2], nextStates: map[symbol][]*lrItem{ genSym("bar"): expectedKernels[3], - genSym("BAR"): expectedKernels[4], + genSym("b"): expectedKernels[4], }, reducibleProds: []*production{ genProd("bar"), @@ -339,7 +340,7 @@ BAR: "bar"; kernelItems: expectedKernels[4], nextStates: map[symbol][]*lrItem{}, reducibleProds: []*production{ - genProd("bar", "BAR"), + genProd("bar", "b"), }, }, } diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go index 522ec1c..4ce8455 100644 --- a/grammar/parsing_table_test.go +++ b/grammar/parsing_table_test.go @@ -18,9 +18,9 @@ func TestGenLALRParsingTable(t *testing.T) { src := ` #name test; -S: L eq R | R; -L: ref R | id; -R: L; +s: l eq r | r; +l: ref r | id; +r: l; eq: '='; ref: '*'; id: "[A-Za-z0-9_]+"; @@ -89,35 +89,35 @@ id: "[A-Za-z0-9_]+"; expectedKernels := map[int][]*lrItem{ 0: { - withLookAhead(genLR0Item("S'", 0, "S"), symbolEOF), + withLookAhead(genLR0Item("s'", 0, "s"), symbolEOF), }, 1: { - withLookAhead(genLR0Item("S'", 1, "S"), symbolEOF), + withLookAhead(genLR0Item("s'", 1, "s"), symbolEOF), }, 2: { - withLookAhead(genLR0Item("S", 1, "L", "eq", "R"), symbolEOF), - withLookAhead(genLR0Item("R", 1, "L"), symbolEOF), + withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), symbolEOF), }, 3: { - withLookAhead(genLR0Item("S", 1, "R"), symbolEOF), + withLookAhead(genLR0Item("s", 1, "r"), symbolEOF), }, 4: { - withLookAhead(genLR0Item("L", 1, "ref", "R"), genSym("eq"), symbolEOF), + withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbolEOF), }, 5: { - withLookAhead(genLR0Item("L", 1, "id"), genSym("eq"), symbolEOF), + withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbolEOF), }, 6: { - withLookAhead(genLR0Item("S", 2, "L", "eq", "R"), symbolEOF), + withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbolEOF), }, 7: { - withLookAhead(genLR0Item("L", 2, "ref", "R"), genSym("eq"), symbolEOF), + withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbolEOF), }, 8: { - withLookAhead(genLR0Item("R", 1, "L"), genSym("eq"), symbolEOF), + withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbolEOF), }, 9: { - withLookAhead(genLR0Item("S", 3, "L", "eq", "R"), symbolEOF), + withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbolEOF), }, } @@ -135,9 +135,9 @@ id: "[A-Za-z0-9_]+"; }, }, goTos: map[symbol][]*lrItem{ - genSym("S"): expectedKernels[1], - genSym("L"): expectedKernels[2], - genSym("R"): expectedKernels[3], + genSym("s"): expectedKernels[1], + genSym("l"): expectedKernels[2], + genSym("r"): expectedKernels[3], }, }, { @@ -145,7 +145,7 @@ id: "[A-Za-z0-9_]+"; acts: map[symbol]testActionEntry{ symbolEOF: { ty: ActionTypeReduce, - production: genProd("S'", "S"), + production: genProd("s'", "s"), }, }, }, @@ -158,7 +158,7 @@ id: "[A-Za-z0-9_]+"; }, symbolEOF: { ty: ActionTypeReduce, - production: genProd("R", "L"), + production: genProd("r", "l"), }, }, }, @@ -167,7 +167,7 @@ id: "[A-Za-z0-9_]+"; acts: map[symbol]testActionEntry{ symbolEOF: { ty: ActionTypeReduce, - production: genProd("S", "R"), + production: genProd("s", "r"), }, }, }, @@ -184,8 +184,8 @@ id: "[A-Za-z0-9_]+"; }, }, goTos: map[symbol][]*lrItem{ - genSym("R"): expectedKernels[7], - genSym("L"): expectedKernels[8], + genSym("r"): expectedKernels[7], + genSym("l"): expectedKernels[8], }, }, { @@ -193,11 +193,11 @@ id: "[A-Za-z0-9_]+"; acts: map[symbol]testActionEntry{ genSym("eq"): { ty: ActionTypeReduce, - production: genProd("L", "id"), + production: genProd("l", "id"), }, symbolEOF: { ty: ActionTypeReduce, - production: genProd("L", "id"), + production: genProd("l", "id"), }, }, }, @@ -214,8 +214,8 @@ id: "[A-Za-z0-9_]+"; }, }, goTos: map[symbol][]*lrItem{ - genSym("L"): expectedKernels[8], - genSym("R"): expectedKernels[9], + genSym("l"): expectedKernels[8], + genSym("r"): expectedKernels[9], }, }, { @@ -223,11 +223,11 @@ id: "[A-Za-z0-9_]+"; acts: map[symbol]testActionEntry{ genSym("eq"): { ty: ActionTypeReduce, - production: genProd("L", "ref", "R"), + production: genProd("l", "ref", "r"), }, symbolEOF: { ty: ActionTypeReduce, - production: genProd("L", "ref", "R"), + production: genProd("l", "ref", "r"), }, }, }, @@ -236,11 +236,11 @@ id: "[A-Za-z0-9_]+"; acts: map[symbol]testActionEntry{ genSym("eq"): { ty: ActionTypeReduce, - production: genProd("R", "L"), + production: genProd("r", "l"), }, symbolEOF: { ty: ActionTypeReduce, - production: genProd("R", "L"), + production: genProd("r", "l"), }, }, }, @@ -249,7 +249,7 @@ id: "[A-Za-z0-9_]+"; acts: map[symbol]testActionEntry{ symbolEOF: { ty: ActionTypeReduce, - production: genProd("S", "L", "eq", "R"), + production: genProd("s", "l", "eq", "r"), }, }, }, diff --git a/spec/lexer.go b/spec/lexer.go index c1c4b0d..d1f3ae7 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -7,6 +7,7 @@ import ( _ "embed" "fmt" "io" + "regexp" "strings" verr "github.com/nihei9/vartan/error" @@ -33,6 +34,11 @@ const ( tokenKindInvalid = tokenKind("invalid") ) +var ( + reIDChar = regexp.MustCompile(`^[0-9a-z_]+$`) + reIDInvalidDigitsPos = regexp.MustCompile(`^[0-9]`) +) + type Position struct { Row int Col int @@ -167,9 +173,33 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { case KindIDKwFragment: return newSymbolToken(tokenKindKWFragment, newPosition(tok.Row+1, tok.Col+1)), nil case KindIDIdentifier: - if strings.HasPrefix(string(tok.Lexeme), "_") { + if !reIDChar.Match(tok.Lexeme) { + return nil, &verr.SpecError{ + Cause: synErrIDInvalidChar, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if strings.HasPrefix(string(tok.Lexeme), "_") || strings.HasSuffix(string(tok.Lexeme), "_") { + return nil, &verr.SpecError{ + Cause: synErrIDInvalidUnderscorePos, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if strings.Contains(string(tok.Lexeme), "__") { + return nil, &verr.SpecError{ + Cause: synErrIDConsecutiveUnderscores, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if reIDInvalidDigitsPos.Match(tok.Lexeme) { return nil, &verr.SpecError{ - Cause: synErrAutoGenID, + Cause: synErrIDInvalidDigitsPos, Detail: string(tok.Lexeme), Row: tok.Row + 1, Col: tok.Col + 1, diff --git a/spec/lexer_test.go b/spec/lexer_test.go index 0e7cc89..5f555ec 100644 --- a/spec/lexer_test.go +++ b/spec/lexer_test.go @@ -120,9 +120,39 @@ bar // This is the fourth comment. }, }, { - caption: "identifiers beginning with an underscore are not allowed because they are used only auto-generated identifiers", + caption: "an identifier cannot contain the capital-case letters", + src: `Abc`, + err: synErrIDInvalidChar, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Zyx`, + err: synErrIDInvalidChar, + }, + { + caption: "the underscore cannot be placed at the beginning of an identifier", src: `_abc`, - err: synErrAutoGenID, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed at the end of an identifier", + src: `abc_`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed consecutively", + src: `a__b`, + err: synErrIDConsecutiveUnderscores, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `0abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `9abc`, + err: synErrIDInvalidDigitsPos, }, { caption: "an unclosed terminal is not a valid token", diff --git a/spec/lexspec.json b/spec/lexspec.json index 7222be0..b8b67f5 100644 --- a/spec/lexspec.json +++ b/spec/lexspec.json @@ -44,7 +44,7 @@ }, { "kind": "identifier", - "pattern": "[A-Za-z_][0-9A-Za-z_]*" + "pattern": "[0-9A-Za-z_]+" }, { "kind": "terminal_open", diff --git a/spec/syntax_error.go b/spec/syntax_error.go index ad847a2..cf64e75 100644 --- a/spec/syntax_error.go +++ b/spec/syntax_error.go @@ -16,12 +16,15 @@ func (e *SyntaxError) Error() string { var ( // lexical errors - synErrAutoGenID = newSyntaxError("you cannot define an identifier beginning with an underscore") - synErrUnclosedTerminal = newSyntaxError("unclosed terminal") - synErrUnclosedString = newSyntaxError("unclosed string") - synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash") - synErrEmptyPattern = newSyntaxError("a pattern must include at least one character") - synErrEmptyString = newSyntaxError("a string must include at least one character") + synErrIDInvalidChar = newSyntaxError("an identifier can contain only the lower-case letter, the digits, and the underscore") + synErrIDInvalidUnderscorePos = newSyntaxError("the underscore cannot be placed at the beginning or end of an identifier") + synErrIDConsecutiveUnderscores = newSyntaxError("the underscore cannot be placed consecutively") + synErrIDInvalidDigitsPos = newSyntaxError("the digits cannot be placed at the biginning of an identifier") + synErrUnclosedTerminal = newSyntaxError("unclosed terminal") + synErrUnclosedString = newSyntaxError("unclosed string") + synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash") + synErrEmptyPattern = newSyntaxError("a pattern must include at least one character") + synErrEmptyString = newSyntaxError("a string must include at least one character") // syntax errors synErrInvalidToken = newSyntaxError("invalid token") diff --git a/spec/vartan_lexer.go b/spec/vartan_lexer.go index 7c0dfd4..f0a8b3e 100644 --- a/spec/vartan_lexer.go +++ b/spec/vartan_lexer.go @@ -646,7 +646,7 @@ func NewLexSpec() *lexSpec { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 1, -1, -1, 1, 1, 1, -1, -1, - -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, + -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, @@ -991,7 +991,7 @@ func NewLexSpec() *lexSpec { 6, 6, 6, 6, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 13, 13, 15, 18, 18, 18, 21, 2, 35, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 36, 43, 44, 0, 0, 37, 45, 46, 0, 0, - 0, 0, 33, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 40, 0, 0, 0, 0, + 0, 0, 33, 4, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 38, 40, 0, 0, 0, 0, 41, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 32, 0, 32, 32, 32, 32, 32, 24, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, |