diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-05-25 21:55:17 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-05-25 21:57:45 +0900 |
commit | 520bf02582be7ab36b17fd78f8931cfdb702b07f (patch) | |
tree | a1e7ad54915152fce6f96a18312e28f34f256c84 /compiler/lexer.go | |
parent | Fix the initial state number (diff) | |
download | tre-520bf02582be7ab36b17fd78f8931cfdb702b07f.tar.gz tre-520bf02582be7ab36b17fd78f8931cfdb702b07f.tar.xz |
Add fragment expression
A fragment entry is defined by an entry whose `fragment` field is `true`, and is referenced by a fragment expression (`\f{...}`).
Diffstat (limited to 'compiler/lexer.go')
-rw-r--r-- | compiler/lexer.go | 74 |
1 files changed, 70 insertions, 4 deletions
diff --git a/compiler/lexer.go b/compiler/lexer.go index 1aadf50..4f6fdac 100644 --- a/compiler/lexer.go +++ b/compiler/lexer.go @@ -24,19 +24,22 @@ const ( tokenKindCharRange = tokenKind("-") tokenKindCodePointLeader = tokenKind("\\u") tokenKindCharPropLeader = tokenKind("\\p") + tokenKindFragmentLeader = tokenKind("\\f") tokenKindLBrace = tokenKind("{") tokenKindRBrace = tokenKind("}") tokenKindEqual = tokenKind("=") tokenKindCodePoint = tokenKind("code point") tokenKindCharPropSymbol = tokenKind("character property symbol") + tokenKindFragmentSymbol = tokenKind("fragment symbol") tokenKindEOF = tokenKind("eof") ) type token struct { - kind tokenKind - char rune - propSymbol string - codePoint string + kind tokenKind + char rune + propSymbol string + codePoint string + fragmentSymbol string } const nullChar = '\u0000' @@ -62,6 +65,13 @@ func newCharPropSymbolToken(propSymbol string) *token { } } +func newFragmentSymbolToken(fragmentSymbol string) *token { + return &token{ + kind: tokenKindFragmentSymbol, + fragmentSymbol: fragmentSymbol, + } +} + type lexerMode string const ( @@ -69,6 +79,7 @@ const ( lexerModeBExp = lexerMode("bracket expression") lexerModeCPExp = lexerMode("code point expression") lexerModeCharPropExp = lexerMode("character property expression") + lexerModeFragmentExp = lexerMode("fragment expression") ) type lexerModeStack struct { @@ -197,6 +208,16 @@ func (l *lexer) next() (*token, error) { l.modeStack.pop() } return tok, nil + case lexerModeFragmentExp: + tok, err := l.nextInFragment(c) + if err != nil { + return nil, err + } + switch tok.kind { + case tokenKindRBrace: + l.modeStack.pop() + } + return tok, nil default: tok, err := l.nextInDefault(c) if err != nil { @@ -213,6 +234,8 @@ func (l *lexer) next() (*token, error) { l.modeStack.push(lexerModeCPExp) case tokenKindCharPropLeader: l.modeStack.push(lexerModeCharPropExp) + case tokenKindFragmentLeader: + l.modeStack.push(lexerModeFragmentExp) } return tok, nil } @@ -294,6 +317,9 @@ func (l *lexer) nextInDefault(c rune) (*token, error) { if c == 'p' { return newToken(tokenKindCharPropLeader, nullChar), nil } + if c == 'f' { + return newToken(tokenKindFragmentLeader, nullChar), nil + } if c == '\\' || c == '.' || c == '*' || c == '+' || c == '?' || c == '|' || c == '(' || c == ')' || c == '[' || c == ']' { return newToken(tokenKindChar, c), nil } @@ -455,6 +481,46 @@ func (l *lexer) nextInCharProp(c rune) (*token, error) { } } +func (l *lexer) nextInFragment(c rune) (*token, error) { + switch c { + case '{': + return newToken(tokenKindLBrace, nullChar), nil + case '}': + return newToken(tokenKindRBrace, nullChar), nil + default: + var b strings.Builder + fmt.Fprint(&b, string(c)) + n := 1 + for { + c, eof, err := l.read() + if err != nil { + return nil, err + } + if eof { + l.restore() + if err != nil { + return nil, err + } + break + } + if c == '}' { + err := l.restore() + if err != nil { + return nil, err + } + break + } + fmt.Fprint(&b, string(c)) + n++ + } + sym := strings.TrimSpace(b.String()) + if len(sym) == 0 { + raiseSyntaxError(SynErrFragmentInvalidSymbol) + } + return newFragmentSymbolToken(sym), nil + } +} + func (l *lexer) read() (rune, bool, error) { if l.reachedEOF { return l.lastChar, l.reachedEOF, nil |