aboutsummaryrefslogtreecommitdiff
path: root/ucd/api.go
diff options
context:
space:
mode:
Diffstat (limited to 'ucd/api.go')
-rw-r--r--ucd/api.go180
1 files changed, 180 insertions, 0 deletions
diff --git a/ucd/api.go b/ucd/api.go
new file mode 100644
index 0000000..8265d54
--- /dev/null
+++ b/ucd/api.go
@@ -0,0 +1,180 @@
+//go:generate go run ../cmd/ucdgen/main.go
+//go:generate go fmt codepoint.go
+
+package ucd
+
+import (
+ "fmt"
+ "strings"
+)
+
+const (
+ // https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf
+ // 3.4 Characters and Encoding
+ // > D9 Unicode codespace: A range of integers from 0 to 10FFFF16.
+ codePointMin = 0x0
+ codePointMax = 0x10FFFF
+)
+
+func NormalizeCharacterProperty(propName, propVal string) (string, error) {
+ if propName == "" {
+ propName = "gc"
+ }
+
+ name, ok := propertyNameAbbs[normalizeSymbolicValue(propName)]
+ if !ok {
+ return "", fmt.Errorf("unsupported character property name: %v", propName)
+ }
+ props, ok := derivedCoreProperties[name]
+ if !ok {
+ return "", nil
+ }
+ var b strings.Builder
+ yes, ok := binaryValues[normalizeSymbolicValue(propVal)]
+ if !ok {
+ return "", fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ if yes {
+ fmt.Fprint(&b, "[")
+ } else {
+ fmt.Fprint(&b, "[^")
+ }
+ for _, prop := range props {
+ fmt.Fprint(&b, prop)
+ }
+ fmt.Fprint(&b, "]")
+
+ return b.String(), nil
+}
+
+func IsContributoryProperty(propName string) bool {
+ if propName == "" {
+ return false
+ }
+
+ for _, p := range contributoryProperties {
+ if propName == p {
+ return true
+ }
+ }
+ return false
+}
+
+func FindCodePointRanges(propName, propVal string) ([]*CodePointRange, bool, error) {
+ if propName == "" {
+ propName = "gc"
+ }
+
+ name, ok := propertyNameAbbs[normalizeSymbolicValue(propName)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property name: %v", propName)
+ }
+ switch name {
+ case "gc":
+ val, ok := generalCategoryValueAbbs[normalizeSymbolicValue(propVal)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ if val == generalCategoryValueAbbs[normalizeSymbolicValue(generalCategoryDefaultValue)] {
+ var allCPs []*CodePointRange
+ if generalCategoryDefaultRange.From > codePointMin {
+ allCPs = append(allCPs, &CodePointRange{
+ From: codePointMin,
+ To: generalCategoryDefaultRange.From - 1,
+ })
+ }
+ if generalCategoryDefaultRange.To < codePointMax {
+ allCPs = append(allCPs, &CodePointRange{
+ From: generalCategoryDefaultRange.To + 1,
+ To: codePointMax,
+ })
+ }
+ for _, cp := range generalCategoryCodePoints {
+ allCPs = append(allCPs, cp...)
+ }
+ return allCPs, true, nil
+ }
+ vals, ok := compositGeneralCategories[val]
+ if !ok {
+ vals = []string{val}
+ }
+ var ranges []*CodePointRange
+ for _, v := range vals {
+ rs, ok := generalCategoryCodePoints[v]
+ if !ok {
+ return nil, false, fmt.Errorf("invalid value of the General_Category property: %v", v)
+ }
+ ranges = append(ranges, rs...)
+ }
+ return ranges, false, nil
+ case "sc":
+ val, ok := scriptValueAbbs[normalizeSymbolicValue(propVal)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ if val == scriptValueAbbs[normalizeSymbolicValue(scriptDefaultValue)] {
+ var allCPs []*CodePointRange
+ if scriptDefaultRange.From > codePointMin {
+ allCPs = append(allCPs, &CodePointRange{
+ From: codePointMin,
+ To: scriptDefaultRange.From - 1,
+ })
+ }
+ if scriptDefaultRange.To < codePointMax {
+ allCPs = append(allCPs, &CodePointRange{
+ From: scriptDefaultRange.To + 1,
+ To: codePointMax,
+ })
+ }
+ for _, cp := range scriptCodepoints {
+ allCPs = append(allCPs, cp...)
+ }
+ return allCPs, true, nil
+ }
+ return scriptCodepoints[val], false, nil
+ case "oalpha":
+ yes, ok := binaryValues[normalizeSymbolicValue(propVal)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ if yes {
+ return otherAlphabeticCodePoints, false, nil
+ } else {
+ return otherAlphabeticCodePoints, true, nil
+ }
+ case "olower":
+ yes, ok := binaryValues[normalizeSymbolicValue(propVal)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ if yes {
+ return otherLowercaseCodePoints, false, nil
+ } else {
+ return otherLowercaseCodePoints, true, nil
+ }
+ case "oupper":
+ yes, ok := binaryValues[normalizeSymbolicValue(propVal)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ if yes {
+ return otherUppercaseCodePoints, false, nil
+ } else {
+ return otherUppercaseCodePoints, true, nil
+ }
+ case "wspace":
+ yes, ok := binaryValues[normalizeSymbolicValue(propVal)]
+ if !ok {
+ return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
+ }
+ if yes {
+ return whiteSpaceCodePoints, false, nil
+ } else {
+ return whiteSpaceCodePoints, true, nil
+ }
+ }
+
+ // If the process reaches this code, it's a bug. We must handle all of the properties registered with
+ // the `propertyNameAbbs`.
+ return nil, false, fmt.Errorf("character property '%v' is unavailable", propName)
+}