aboutsummaryrefslogtreecommitdiff
path: root/compiler/lexer.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-02-14 20:19:22 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-02-14 20:19:22 +0900
commit467f223668d13ffa42679e6c928d82d5d402d87d (patch)
treeb518082b8af4d79b19b41e8beacc085d6f0cf84f /compiler/lexer.go
parentAdd dot symbol matching any single character (diff)
downloadtre-467f223668d13ffa42679e6c928d82d5d402d87d.tar.gz
tre-467f223668d13ffa42679e6c928d82d5d402d87d.tar.xz
Add bracket expression matching specified character
The bracket expression matches any single character specified in it. In the bracket expression, the special characters like ., *, and so on are also handled as normal characters.
Diffstat (limited to 'compiler/lexer.go')
-rw-r--r--compiler/lexer.go55
1 files changed, 54 insertions, 1 deletions
diff --git a/compiler/lexer.go b/compiler/lexer.go
index 1c09260..3e3cf35 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -15,6 +15,8 @@ const (
tokenKindAlt = tokenKind("|")
tokenKindGroupOpen = tokenKind("(")
tokenKindGroupClose = tokenKind(")")
+ tokenKindBExpOpen = tokenKind("[")
+ tokenKindBExpClose = tokenKind("]")
tokenKindEOF = tokenKind("eof")
)
@@ -32,11 +34,19 @@ func newToken(kind tokenKind, char rune) *token {
}
}
+type lexerMode string
+
+const (
+ lexerModeDefault = lexerMode("default")
+ lexerModeBExp = lexerMode("bracket expression")
+)
+
type lexer struct {
src *bufio.Reader
lastChar rune
prevChar rune
reachedEOF bool
+ mode lexerMode
}
func newLexer(src io.Reader) *lexer {
@@ -45,6 +55,7 @@ func newLexer(src io.Reader) *lexer {
lastChar: nullChar,
prevChar: nullChar,
reachedEOF: false,
+ mode: lexerModeDefault,
}
}
@@ -57,6 +68,15 @@ func (l *lexer) next() (*token, error) {
return newToken(tokenKindEOF, nullChar), nil
}
+ switch l.mode {
+ case lexerModeBExp:
+ return l.nextInBExp(c)
+ default:
+ return l.nextInDefault(c)
+ }
+}
+
+func (l *lexer) nextInDefault(c rune) (*token, error) {
switch c {
case '*':
return newToken(tokenKindRepeat, nullChar), nil
@@ -68,6 +88,39 @@ func (l *lexer) next() (*token, error) {
return newToken(tokenKindGroupOpen, nullChar), nil
case ')':
return newToken(tokenKindGroupClose, nullChar), nil
+ case '[':
+ l.mode = lexerModeBExp
+ return newToken(tokenKindBExpOpen, nullChar), nil
+ case ']':
+ return newToken(tokenKindBExpClose, nullChar), nil
+ case '\\':
+ c, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ return nil, &SyntaxError{
+ message: "incompleted escape sequence; unexpected EOF follows \\ character",
+ }
+ }
+ switch {
+ case c == '\\' || c == '.' || c == '*' || c == '|' || c == '(' || c == ')' || c == '[' || c == ']':
+ return newToken(tokenKindChar, c), nil
+ default:
+ return nil, &SyntaxError{
+ message: fmt.Sprintf("invalid escape sequence '\\%s'", string(c)),
+ }
+ }
+ default:
+ return newToken(tokenKindChar, c), nil
+ }
+}
+
+func (l *lexer) nextInBExp(c rune) (*token, error) {
+ switch c {
+ case ']':
+ l.mode = lexerModeDefault
+ return newToken(tokenKindBExpClose, nullChar), nil
case '\\':
c, eof, err := l.read()
if err != nil {
@@ -79,7 +132,7 @@ func (l *lexer) next() (*token, error) {
}
}
switch {
- case c == '\\' || c == '.' || c == '*' || c == '|' || c == '(' || c == ')':
+ case c == '\\' || c == ']':
return newToken(tokenKindChar, c), nil
default:
return nil, &SyntaxError{