aboutsummaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/parser.go30
-rw-r--r--compiler/ucd.go53
-rw-r--r--compiler/ucd_table.go31
-rw-r--r--compiler/ucd_table.go.tmpl21
4 files changed, 110 insertions, 25 deletions
diff --git a/compiler/parser.go b/compiler/parser.go
index 55b8238..dd73c28 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -546,20 +546,38 @@ func (p *parser) parseCharProp() astNode {
propName = "gc"
propVal = sym1
}
- cpRanges, err := findCodePointRanges(propName, propVal)
+ cpRanges, inverse, err := findCodePointRanges(propName, propVal)
if err != nil {
p.errMsgDetails = fmt.Sprintf("%v", err)
raiseSyntaxError(synErrCharPropUnsupported)
}
var alt astNode
- for _, r := range cpRanges {
+ if inverse {
+ r := cpRanges[0]
from := genNormalCharAST(r.From)
to := genNormalCharAST(r.To)
- alt = genAltNode(
- alt,
- genRangeAST(from, to),
- )
+ alt = exclude(genRangeAST(from, to), genAnyCharAST())
+ if alt == nil {
+ panic(fmt.Errorf("a pattern that isn't matching any symbols"))
+ }
+ for _, r := range cpRanges[1:] {
+ from := genNormalCharAST(r.From)
+ to := genNormalCharAST(r.To)
+ alt = exclude(genRangeAST(from, to), alt)
+ if alt == nil {
+ panic(fmt.Errorf("a pattern that isn't matching any symbols"))
+ }
+ }
+ } else {
+ for _, r := range cpRanges {
+ from := genNormalCharAST(r.From)
+ to := genNormalCharAST(r.To)
+ alt = genAltNode(
+ alt,
+ genRangeAST(from, to),
+ )
+ }
}
if !p.consume(tokenKindRBrace) {
diff --git a/compiler/ucd.go b/compiler/ucd.go
index 506f03a..5ad0986 100644
--- a/compiler/ucd.go
+++ b/compiler/ucd.go
@@ -9,28 +9,43 @@ import (
"github.com/nihei9/maleeni/ucd"
)
-func findCodePointRanges(propName, propVal string) ([]*ucd.CodePointRange, error) {
- name := ucd.NormalizeSymbolicValue(propName)
- val := ucd.NormalizeSymbolicValue(propVal)
- name, ok := propertyNameAbbs[name]
+func findCodePointRanges(propName, propVal string) ([]*ucd.CodePointRange, bool, error) {
+ name, ok := propertyNameAbbs[ucd.NormalizeSymbolicValue(propName)]
if !ok {
- return nil, fmt.Errorf("unsupported character property: %v", propName)
+ return nil, false, fmt.Errorf("unsupported character property name: %v", propName)
}
- val, ok = generalCategoryValueAbbs[val]
- if !ok {
- return nil, fmt.Errorf("unsupported character property value: %v", val)
- }
- vals, ok := compositGeneralCategories[val]
- if !ok {
- vals = []string{val}
- }
- var ranges []*ucd.CodePointRange
- for _, v := range vals {
- rs, ok := generalCategoryCodePoints[v]
+ switch name {
+ case "gc":
+ val, ok := generalCategoryValueAbbs[ucd.NormalizeSymbolicValue(propVal)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ vals, ok := compositGeneralCategories[val]
if !ok {
- return nil, fmt.Errorf("invalie value of the General_Category property: %v", v)
+ vals = []string{val}
+ }
+ var ranges []*ucd.CodePointRange
+ for _, v := range vals {
+ rs, ok := generalCategoryCodePoints[v]
+ if !ok {
+ return nil, false, fmt.Errorf("invalid value of the General_Category property: %v", v)
+ }
+ ranges = append(ranges, rs...)
+ }
+ return ranges, false, nil
+ case "wspace":
+ yes, ok := binaryValues[ucd.NormalizeSymbolicValue(propVal)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ if yes {
+ return whiteSpaceCodePoints, false, nil
+ } else {
+ return whiteSpaceCodePoints, true, nil
}
- ranges = append(ranges, rs...)
}
- return ranges, nil
+
+ // If the process reaches this code, it's a bug. We must handle all of the properties registered with
+ // the `propertyNameAbbs`.
+ return nil, false, fmt.Errorf("character property '%v' is unavailable", propName)
}
diff --git a/compiler/ucd_table.go b/compiler/ucd_table.go
index c941f4c..4c3c723 100644
--- a/compiler/ucd_table.go
+++ b/compiler/ucd_table.go
@@ -28,6 +28,22 @@ var compositGeneralCategories = map[string][]string{
var propertyNameAbbs = map[string]string{
"generalcategory": "gc",
"gc": "gc",
+ "whitespace": "wspace",
+ "wspace": "wspace",
+ "space": "wspace",
+}
+
+// https://www.unicode.org/reports/tr44/#Type_Key_Table
+// https://www.unicode.org/reports/tr44/#Binary_Values_Table
+var binaryValues = map[string]bool{
+ "yes": true,
+ "y": true,
+ "true": true,
+ "t": true,
+ "no": false,
+ "n": false,
+ "false": false,
+ "f": false,
}
// https://www.unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt
@@ -4087,3 +4103,18 @@ var generalCategoryCodePoints = map[string][]*ucd.CodePointRange{
&ucd.CodePointRange{From: rune(12288), To: rune(12288)},
},
}
+
+// https://www.unicode.org/Public/13.0.0/ucd/PropList.txt
+var whiteSpaceCodePoints = []*ucd.CodePointRange{
+ &ucd.CodePointRange{From: rune(9), To: rune(13)},
+ &ucd.CodePointRange{From: rune(32), To: rune(32)},
+ &ucd.CodePointRange{From: rune(133), To: rune(133)},
+ &ucd.CodePointRange{From: rune(160), To: rune(160)},
+ &ucd.CodePointRange{From: rune(5760), To: rune(5760)},
+ &ucd.CodePointRange{From: rune(8192), To: rune(8202)},
+ &ucd.CodePointRange{From: rune(8232), To: rune(8232)},
+ &ucd.CodePointRange{From: rune(8233), To: rune(8233)},
+ &ucd.CodePointRange{From: rune(8239), To: rune(8239)},
+ &ucd.CodePointRange{From: rune(8287), To: rune(8287)},
+ &ucd.CodePointRange{From: rune(12288), To: rune(12288)},
+}
diff --git a/compiler/ucd_table.go.tmpl b/compiler/ucd_table.go.tmpl
index a364191..80142c7 100644
--- a/compiler/ucd_table.go.tmpl
+++ b/compiler/ucd_table.go.tmpl
@@ -28,6 +28,22 @@ var compositGeneralCategories = map[string][]string{
var propertyNameAbbs = map[string]string{
"generalcategory": "gc",
"gc": "gc",
+ "whitespace": "wspace",
+ "wspace": "wspace",
+ "space": "wspace",
+}
+
+// https://www.unicode.org/reports/tr44/#Type_Key_Table
+// https://www.unicode.org/reports/tr44/#Binary_Values_Table
+var binaryValues = map[string]bool{
+ "yes": true,
+ "y": true,
+ "true": true,
+ "t": true,
+ "no": false,
+ "n": false,
+ "false": false,
+ "f": false,
}
// https://www.unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt
@@ -41,3 +57,8 @@ var generalCategoryCodePoints = map[string][]*ucd.CodePointRange{ {{ range $prop
&ucd.CodePointRange{From: rune({{ .From }}), To: rune({{ .To }})},{{ end }}
},{{ end }}
}
+
+// https://www.unicode.org/Public/13.0.0/ucd/PropList.txt
+var whiteSpaceCodePoints = []*ucd.CodePointRange{ {{ range .PropList.WhiteSpace }}
+ &ucd.CodePointRange{From: rune({{ .From }}), To: rune({{ .To }})},{{ end }}
+}