aboutsummaryrefslogtreecommitdiff
path: root/compiler/parser.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-04-30 01:54:02 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-04-30 01:54:02 +0900
commit43fdbf94ad87ea91a173c72688cad70a0a5f1ab4 (patch)
tree655f651e39f13b5e415445d1ef24f4ecb7511041 /compiler/parser.go
parentAdd code point expression (Meet RL1.1 of UTS #18) (diff)
downloadtre-43fdbf94ad87ea91a173c72688cad70a0a5f1ab4.tar.gz
tre-43fdbf94ad87ea91a173c72688cad70a0a5f1ab4.tar.xz
Add character property expression (Meet RL1.2 of UTS #18 partially)
\p{property name=property value} matches a character has the property. When the property name is General_Category, it can be omitted. That is, \p{Letter} equals \p{General_Category=Letter}. Currently, only General_Category is supported. This feature meets RL1.2 of UTS #18 partially. RL1.2 Properties: https://unicode.org/reports/tr18/#RL1.2
Diffstat (limited to 'compiler/parser.go')
-rw-r--r--compiler/parser.go55
1 files changed, 54 insertions, 1 deletions
diff --git a/compiler/parser.go b/compiler/parser.go
index ba15cd0..7b50459 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -312,6 +312,9 @@ func (p *parser) parseSingleChar() astNode {
if p.consume(tokenKindCodePointLeader) {
return p.parseCodePoint()
}
+ if p.consume(tokenKindCharPropLeader) {
+ return p.parseCharProp()
+ }
c := p.parseNormalChar()
if c == nil {
if p.consume(tokenKindBExpClose) {
@@ -393,12 +396,62 @@ func (p *parser) parseCodePoint() astNode {
return concat
}
+func (p *parser) parseCharProp() astNode {
+ if !p.consume(tokenKindLBrace) {
+ raiseSyntaxError(synErrCharPropExpInvalidForm)
+ }
+ var sym1, sym2 string
+ if !p.consume(tokenKindCharPropSymbol) {
+ raiseSyntaxError(synErrCharPropExpInvalidForm)
+ }
+ sym1 = p.lastTok.propSymbol
+ if p.consume(tokenKindEqual) {
+ if !p.consume(tokenKindCharPropSymbol) {
+ raiseSyntaxError(synErrCharPropExpInvalidForm)
+ }
+ sym2 = p.lastTok.propSymbol
+ }
+
+ var propName, propVal string
+ if sym2 != "" {
+ propName = sym1
+ propVal = sym2
+ } else {
+ propName = "gc"
+ propVal = sym1
+ }
+ cpRanges, err := findCodePointRanges(propName, propVal)
+ if err != nil {
+ p.errMsgDetails = fmt.Sprintf("%v", err)
+ raiseSyntaxError(synErrCharPropUnsupported)
+ }
+
+ var alt astNode
+ for _, r := range cpRanges {
+ from := genNormalCharAST(r.From)
+ to := genNormalCharAST(r.To)
+ alt = genAltNode(
+ alt,
+ genRangeAST(from, to),
+ )
+ }
+
+ if !p.consume(tokenKindRBrace) {
+ raiseSyntaxError(synErrCharPropExpInvalidForm)
+ }
+
+ return alt
+}
+
func (p *parser) parseNormalChar() astNode {
if !p.consume(tokenKindChar) {
return nil
}
+ return genNormalCharAST(p.lastTok.char)
+}
- b := []byte(string(p.lastTok.char))
+func genNormalCharAST(c rune) astNode {
+ b := []byte(string(c))
switch len(b) {
case 1:
return newSymbolNode(b[0])