aboutsummaryrefslogtreecommitdiff
path: root/compiler/lexer.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-04-11 16:41:05 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-04-11 16:41:05 +0900
commita13655e3936ba9166051914832dedcbdb28b056c (patch)
treec6fcd27c4769aecf16d4f8d2a43f1297f62b6caa /compiler/lexer.go
parentAdd logging to compile command (diff)
downloadtre-a13655e3936ba9166051914832dedcbdb28b056c.tar.gz
tre-a13655e3936ba9166051914832dedcbdb28b056c.tar.xz
Fix grammar the parser accepts
* Add cases test the parse method. * Fix the parser to pass the cases.
Diffstat (limited to 'compiler/lexer.go')
-rw-r--r--compiler/lexer.go78
1 files changed, 73 insertions, 5 deletions
diff --git a/compiler/lexer.go b/compiler/lexer.go
index dba667f..19c7105 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -45,6 +45,20 @@ const (
lexerModeBExp = lexerMode("bracket expression")
)
+type rangeState string
+
+// [a-z]
+// ^^^^
+// |||`-- ready
+// ||`-- expect range terminator
+// |`-- read range initiator
+// `-- ready
+const (
+ rangeStateReady = rangeState("ready")
+ rangeStateReadRangeInitiator = rangeState("read range initiator")
+ rangeStateExpectRangeTerminator = rangeState("expect range terminator")
+)
+
type lexer struct {
src *bufio.Reader
peekChar2 rune
@@ -58,6 +72,7 @@ type lexer struct {
prevChar2 rune
pervEOF2 bool
mode lexerMode
+ rangeState rangeState
}
func newLexer(src io.Reader) *lexer {
@@ -74,6 +89,7 @@ func newLexer(src io.Reader) *lexer {
prevChar2: nullChar,
pervEOF2: false,
mode: lexerModeDefault,
+ rangeState: rangeStateReady,
}
}
@@ -88,9 +104,38 @@ func (l *lexer) next() (*token, error) {
switch l.mode {
case lexerModeBExp:
- return l.nextInBExp(c)
+ tok, err := l.nextInBExp(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindBExpClose:
+ l.mode = lexerModeDefault
+ case tokenKindCharRange:
+ l.rangeState = rangeStateExpectRangeTerminator
+ case tokenKindChar:
+ switch l.rangeState {
+ case rangeStateReady:
+ l.rangeState = rangeStateReadRangeInitiator
+ case rangeStateExpectRangeTerminator:
+ l.rangeState = rangeStateReady
+ }
+ }
+ return tok, nil
default:
- return l.nextInDefault(c)
+ tok, err := l.nextInDefault(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindBExpOpen:
+ l.mode = lexerModeBExp
+ l.rangeState = rangeStateReady
+ case tokenKindInverseBExpOpen:
+ l.mode = lexerModeBExp
+ l.rangeState = rangeStateReady
+ }
+ return tok, nil
}
}
@@ -111,7 +156,6 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
case ')':
return newToken(tokenKindGroupClose, nullChar), nil
case '[':
- l.mode = lexerModeBExp
c1, eof, err := l.read()
if err != nil {
return nil, err
@@ -185,9 +229,33 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
func (l *lexer) nextInBExp(c rune) (*token, error) {
switch c {
case '-':
- return newToken(tokenKindCharRange, nullChar), nil
+ if l.rangeState != rangeStateReadRangeInitiator {
+ return newToken(tokenKindChar, c), nil
+ }
+ c1, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindCharRange, nullChar), nil
+ }
+ if c1 != ']' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindCharRange, nullChar), nil
+ }
+ err = l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindChar, c), nil
case ']':
- l.mode = lexerModeDefault
return newToken(tokenKindBExpClose, nullChar), nil
case '\\':
c, eof, err := l.read()