aboutsummaryrefslogtreecommitdiff
path: root/compiler/lexer.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-04-01 21:03:40 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-04-01 21:03:40 +0900
commit69643f8e38ee74406365704f73be4d5f865af8da (patch)
treefca8d8371527a59f09778fa124d4b90addca6c88 /compiler/lexer.go
parentPass values in error type to panic() (diff)
downloadtre-69643f8e38ee74406365704f73be4d5f865af8da.tar.gz
tre-69643f8e38ee74406365704f73be4d5f865af8da.tar.xz
Add logical inverse expression
[^a-z] matches any character that is not in the range a-z.
Diffstat (limited to 'compiler/lexer.go')
-rw-r--r--compiler/lexer.go118
1 files changed, 101 insertions, 17 deletions
diff --git a/compiler/lexer.go b/compiler/lexer.go
index acc9658..dba667f 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -18,6 +18,7 @@ const (
tokenKindGroupOpen = tokenKind("(")
tokenKindGroupClose = tokenKind(")")
tokenKindBExpOpen = tokenKind("[")
+ tokenKindInverseBExpOpen = tokenKind("[^")
tokenKindBExpClose = tokenKind("]")
tokenKindCharRange = tokenKind("-")
tokenKindEOF = tokenKind("eof")
@@ -46,18 +47,32 @@ const (
type lexer struct {
src *bufio.Reader
+ peekChar2 rune
+ peekEOF2 bool
+ peekChar1 rune
+ peekEOF1 bool
lastChar rune
- prevChar rune
reachedEOF bool
+ prevChar1 rune
+ prevEOF1 bool
+ prevChar2 rune
+ pervEOF2 bool
mode lexerMode
}
func newLexer(src io.Reader) *lexer {
return &lexer{
src: bufio.NewReader(src),
+ peekChar2: nullChar,
+ peekEOF2: false,
+ peekChar1: nullChar,
+ peekEOF1: false,
lastChar: nullChar,
- prevChar: nullChar,
reachedEOF: false,
+ prevChar1: nullChar,
+ prevEOF1: false,
+ prevChar2: nullChar,
+ pervEOF2: false,
mode: lexerModeDefault,
}
}
@@ -97,6 +112,50 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
return newToken(tokenKindGroupClose, nullChar), nil
case '[':
l.mode = lexerModeBExp
+ c1, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindBExpOpen, nullChar), nil
+ }
+ if c1 != '^' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindBExpOpen, nullChar), nil
+ }
+ c2, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindInverseBExpOpen, nullChar), nil
+ }
+ if c2 != ']' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindInverseBExpOpen, nullChar), nil
+ }
+ err = l.restore()
+ if err != nil {
+ return nil, err
+ }
+ err = l.restore()
+ if err != nil {
+ return nil, err
+ }
return newToken(tokenKindBExpOpen, nullChar), nil
case ']':
return newToken(tokenKindBExpClose, nullChar), nil
@@ -141,7 +200,7 @@ func (l *lexer) nextInBExp(c rune) (*token, error) {
}
}
switch {
- case c == '\\' || c == '-' || c == ']':
+ case c == '\\' || c == '^' || c == '-' || c == ']':
return newToken(tokenKindChar, c), nil
default:
return nil, &SyntaxError{
@@ -154,32 +213,57 @@ func (l *lexer) nextInBExp(c rune) (*token, error) {
}
func (l *lexer) read() (rune, bool, error) {
+ if l.reachedEOF {
+ return l.lastChar, l.reachedEOF, nil
+ }
+ if l.peekChar1 != nullChar || l.peekEOF1 {
+ l.prevChar2 = l.prevChar1
+ l.pervEOF2 = l.prevEOF1
+ l.prevChar1 = l.lastChar
+ l.prevEOF1 = l.reachedEOF
+ l.lastChar = l.peekChar1
+ l.reachedEOF = l.peekEOF1
+ l.peekChar1 = l.peekChar2
+ l.peekEOF1 = l.peekEOF2
+ l.peekChar2 = nullChar
+ l.peekEOF2 = false
+ return l.lastChar, l.reachedEOF, nil
+ }
c, _, err := l.src.ReadRune()
if err != nil {
if err == io.EOF {
- l.prevChar = l.lastChar
+ l.prevChar2 = l.prevChar1
+ l.pervEOF2 = l.prevEOF1
+ l.prevChar1 = l.lastChar
+ l.prevEOF1 = l.reachedEOF
l.lastChar = nullChar
l.reachedEOF = true
- return nullChar, true, nil
+ return l.lastChar, l.reachedEOF, nil
}
return nullChar, false, err
}
- l.prevChar = l.lastChar
+ l.prevChar2 = l.prevChar1
+ l.pervEOF2 = l.prevEOF1
+ l.prevChar1 = l.lastChar
+ l.prevEOF1 = l.reachedEOF
l.lastChar = c
- return c, false, nil
+ l.reachedEOF = false
+ return l.lastChar, l.reachedEOF, nil
}
func (l *lexer) restore() error {
- if l.reachedEOF {
- l.lastChar = l.prevChar
- l.prevChar = nullChar
- l.reachedEOF = false
- return l.src.UnreadRune()
- }
- if l.lastChar == nullChar {
+ if l.lastChar == nullChar && !l.reachedEOF {
return fmt.Errorf("the lexer failed to call restore() because the last character is null")
}
- l.lastChar = l.prevChar
- l.prevChar = nullChar
- return l.src.UnreadRune()
+ l.peekChar2 = l.peekChar1
+ l.peekEOF2 = l.peekEOF1
+ l.peekChar1 = l.lastChar
+ l.peekEOF1 = l.reachedEOF
+ l.lastChar = l.prevChar1
+ l.reachedEOF = l.prevEOF1
+ l.prevChar1 = l.prevChar2
+ l.prevEOF1 = l.pervEOF2
+ l.prevChar2 = nullChar
+ l.pervEOF2 = false
+ return nil
}