diff options
Diffstat (limited to 'spec')
-rw-r--r-- | spec/lexer.go | 34 | ||||
-rw-r--r-- | spec/lexer_test.go | 34 | ||||
-rw-r--r-- | spec/lexspec.json | 2 | ||||
-rw-r--r-- | spec/syntax_error.go | 15 | ||||
-rw-r--r-- | spec/vartan_lexer.go | 4 |
5 files changed, 76 insertions, 13 deletions
diff --git a/spec/lexer.go b/spec/lexer.go index c1c4b0d..d1f3ae7 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -7,6 +7,7 @@ import ( _ "embed" "fmt" "io" + "regexp" "strings" verr "github.com/nihei9/vartan/error" @@ -33,6 +34,11 @@ const ( tokenKindInvalid = tokenKind("invalid") ) +var ( + reIDChar = regexp.MustCompile(`^[0-9a-z_]+$`) + reIDInvalidDigitsPos = regexp.MustCompile(`^[0-9]`) +) + type Position struct { Row int Col int @@ -167,9 +173,33 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { case KindIDKwFragment: return newSymbolToken(tokenKindKWFragment, newPosition(tok.Row+1, tok.Col+1)), nil case KindIDIdentifier: - if strings.HasPrefix(string(tok.Lexeme), "_") { + if !reIDChar.Match(tok.Lexeme) { + return nil, &verr.SpecError{ + Cause: synErrIDInvalidChar, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if strings.HasPrefix(string(tok.Lexeme), "_") || strings.HasSuffix(string(tok.Lexeme), "_") { + return nil, &verr.SpecError{ + Cause: synErrIDInvalidUnderscorePos, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if strings.Contains(string(tok.Lexeme), "__") { + return nil, &verr.SpecError{ + Cause: synErrIDConsecutiveUnderscores, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if reIDInvalidDigitsPos.Match(tok.Lexeme) { return nil, &verr.SpecError{ - Cause: synErrAutoGenID, + Cause: synErrIDInvalidDigitsPos, Detail: string(tok.Lexeme), Row: tok.Row + 1, Col: tok.Col + 1, diff --git a/spec/lexer_test.go b/spec/lexer_test.go index 0e7cc89..5f555ec 100644 --- a/spec/lexer_test.go +++ b/spec/lexer_test.go @@ -120,9 +120,39 @@ bar // This is the fourth comment. }, }, { - caption: "identifiers beginning with an underscore are not allowed because they are used only auto-generated identifiers", + caption: "an identifier cannot contain the capital-case letters", + src: `Abc`, + err: synErrIDInvalidChar, + }, + { + caption: "an identifier cannot contain the capital-case letters", + src: `Zyx`, + err: synErrIDInvalidChar, + }, + { + caption: "the underscore cannot be placed at the beginning of an identifier", src: `_abc`, - err: synErrAutoGenID, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed at the end of an identifier", + src: `abc_`, + err: synErrIDInvalidUnderscorePos, + }, + { + caption: "the underscore cannot be placed consecutively", + src: `a__b`, + err: synErrIDConsecutiveUnderscores, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `0abc`, + err: synErrIDInvalidDigitsPos, + }, + { + caption: "the digits cannot be placed at the biginning of an identifier", + src: `9abc`, + err: synErrIDInvalidDigitsPos, }, { caption: "an unclosed terminal is not a valid token", diff --git a/spec/lexspec.json b/spec/lexspec.json index 7222be0..b8b67f5 100644 --- a/spec/lexspec.json +++ b/spec/lexspec.json @@ -44,7 +44,7 @@ }, { "kind": "identifier", - "pattern": "[A-Za-z_][0-9A-Za-z_]*" + "pattern": "[0-9A-Za-z_]+" }, { "kind": "terminal_open", diff --git a/spec/syntax_error.go b/spec/syntax_error.go index ad847a2..cf64e75 100644 --- a/spec/syntax_error.go +++ b/spec/syntax_error.go @@ -16,12 +16,15 @@ func (e *SyntaxError) Error() string { var ( // lexical errors - synErrAutoGenID = newSyntaxError("you cannot define an identifier beginning with an underscore") - synErrUnclosedTerminal = newSyntaxError("unclosed terminal") - synErrUnclosedString = newSyntaxError("unclosed string") - synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash") - synErrEmptyPattern = newSyntaxError("a pattern must include at least one character") - synErrEmptyString = newSyntaxError("a string must include at least one character") + synErrIDInvalidChar = newSyntaxError("an identifier can contain only the lower-case letter, the digits, and the underscore") + synErrIDInvalidUnderscorePos = newSyntaxError("the underscore cannot be placed at the beginning or end of an identifier") + synErrIDConsecutiveUnderscores = newSyntaxError("the underscore cannot be placed consecutively") + synErrIDInvalidDigitsPos = newSyntaxError("the digits cannot be placed at the biginning of an identifier") + synErrUnclosedTerminal = newSyntaxError("unclosed terminal") + synErrUnclosedString = newSyntaxError("unclosed string") + synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash") + synErrEmptyPattern = newSyntaxError("a pattern must include at least one character") + synErrEmptyString = newSyntaxError("a string must include at least one character") // syntax errors synErrInvalidToken = newSyntaxError("invalid token") diff --git a/spec/vartan_lexer.go b/spec/vartan_lexer.go index 7c0dfd4..f0a8b3e 100644 --- a/spec/vartan_lexer.go +++ b/spec/vartan_lexer.go @@ -646,7 +646,7 @@ func NewLexSpec() *lexSpec { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 1, -1, -1, 1, 1, 1, -1, -1, - -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, + -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, @@ -991,7 +991,7 @@ func NewLexSpec() *lexSpec { 6, 6, 6, 6, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 13, 13, 15, 18, 18, 18, 21, 2, 35, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 36, 43, 44, 0, 0, 37, 45, 46, 0, 0, - 0, 0, 33, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 40, 0, 0, 0, 0, + 0, 0, 33, 4, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 38, 40, 0, 0, 0, 0, 41, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 32, 0, 32, 32, 32, 32, 32, 24, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, |