diff options
Diffstat (limited to 'ucd/api.go')
-rw-r--r-- | ucd/api.go | 180 |
1 files changed, 180 insertions, 0 deletions
diff --git a/ucd/api.go b/ucd/api.go new file mode 100644 index 0000000..8265d54 --- /dev/null +++ b/ucd/api.go @@ -0,0 +1,180 @@ +//go:generate go run ../cmd/ucdgen/main.go +//go:generate go fmt codepoint.go + +package ucd + +import ( + "fmt" + "strings" +) + +const ( + // https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf + // 3.4 Characters and Encoding + // > D9 Unicode codespace: A range of integers from 0 to 10FFFF16. + codePointMin = 0x0 + codePointMax = 0x10FFFF +) + +func NormalizeCharacterProperty(propName, propVal string) (string, error) { + if propName == "" { + propName = "gc" + } + + name, ok := propertyNameAbbs[normalizeSymbolicValue(propName)] + if !ok { + return "", fmt.Errorf("unsupported character property name: %v", propName) + } + props, ok := derivedCoreProperties[name] + if !ok { + return "", nil + } + var b strings.Builder + yes, ok := binaryValues[normalizeSymbolicValue(propVal)] + if !ok { + return "", fmt.Errorf("unsupported character property value: %v", propVal) + } + if yes { + fmt.Fprint(&b, "[") + } else { + fmt.Fprint(&b, "[^") + } + for _, prop := range props { + fmt.Fprint(&b, prop) + } + fmt.Fprint(&b, "]") + + return b.String(), nil +} + +func IsContributoryProperty(propName string) bool { + if propName == "" { + return false + } + + for _, p := range contributoryProperties { + if propName == p { + return true + } + } + return false +} + +func FindCodePointRanges(propName, propVal string) ([]*CodePointRange, bool, error) { + if propName == "" { + propName = "gc" + } + + name, ok := propertyNameAbbs[normalizeSymbolicValue(propName)] + if !ok { + return nil, false, fmt.Errorf("unsupported character property name: %v", propName) + } + switch name { + case "gc": + val, ok := generalCategoryValueAbbs[normalizeSymbolicValue(propVal)] + if !ok { + return nil, false, fmt.Errorf("unsupported character property value: %v", propVal) + } + if val == generalCategoryValueAbbs[normalizeSymbolicValue(generalCategoryDefaultValue)] { + var allCPs []*CodePointRange + if generalCategoryDefaultRange.From > codePointMin { + allCPs = append(allCPs, &CodePointRange{ + From: codePointMin, + To: generalCategoryDefaultRange.From - 1, + }) + } + if generalCategoryDefaultRange.To < codePointMax { + allCPs = append(allCPs, &CodePointRange{ + From: generalCategoryDefaultRange.To + 1, + To: codePointMax, + }) + } + for _, cp := range generalCategoryCodePoints { + allCPs = append(allCPs, cp...) + } + return allCPs, true, nil + } + vals, ok := compositGeneralCategories[val] + if !ok { + vals = []string{val} + } + var ranges []*CodePointRange + for _, v := range vals { + rs, ok := generalCategoryCodePoints[v] + if !ok { + return nil, false, fmt.Errorf("invalid value of the General_Category property: %v", v) + } + ranges = append(ranges, rs...) + } + return ranges, false, nil + case "sc": + val, ok := scriptValueAbbs[normalizeSymbolicValue(propVal)] + if !ok { + return nil, false, fmt.Errorf("unsupported character property value: %v", propVal) + } + if val == scriptValueAbbs[normalizeSymbolicValue(scriptDefaultValue)] { + var allCPs []*CodePointRange + if scriptDefaultRange.From > codePointMin { + allCPs = append(allCPs, &CodePointRange{ + From: codePointMin, + To: scriptDefaultRange.From - 1, + }) + } + if scriptDefaultRange.To < codePointMax { + allCPs = append(allCPs, &CodePointRange{ + From: scriptDefaultRange.To + 1, + To: codePointMax, + }) + } + for _, cp := range scriptCodepoints { + allCPs = append(allCPs, cp...) + } + return allCPs, true, nil + } + return scriptCodepoints[val], false, nil + case "oalpha": + yes, ok := binaryValues[normalizeSymbolicValue(propVal)] + if !ok { + return nil, false, fmt.Errorf("unsupported character property value: %v", propVal) + } + if yes { + return otherAlphabeticCodePoints, false, nil + } else { + return otherAlphabeticCodePoints, true, nil + } + case "olower": + yes, ok := binaryValues[normalizeSymbolicValue(propVal)] + if !ok { + return nil, false, fmt.Errorf("unsupported character property value: %v", propVal) + } + if yes { + return otherLowercaseCodePoints, false, nil + } else { + return otherLowercaseCodePoints, true, nil + } + case "oupper": + yes, ok := binaryValues[normalizeSymbolicValue(propVal)] + if !ok { + return nil, false, fmt.Errorf("unsupported character property value: %v", propVal) + } + if yes { + return otherUppercaseCodePoints, false, nil + } else { + return otherUppercaseCodePoints, true, nil + } + case "wspace": + yes, ok := binaryValues[normalizeSymbolicValue(propVal)] + if !ok { + return nil, false, fmt.Errorf("unsupported character property value: %v", propVal) + } + if yes { + return whiteSpaceCodePoints, false, nil + } else { + return whiteSpaceCodePoints, true, nil + } + } + + // If the process reaches this code, it's a bug. We must handle all of the properties registered with + // the `propertyNameAbbs`. + return nil, false, fmt.Errorf("character property '%v' is unavailable", propName) +} |