aboutsummaryrefslogtreecommitdiff
path: root/compiler/ucd.go
blob: 3c0bee17b17cf9e1e0c32fbd096f9c9bd8c60567 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
//go:generate go run ../cmd/generator/main.go
//go:generate go fmt ucd_table.go

package compiler

import (
	"fmt"
	"strings"

	"github.com/nihei9/maleeni/ucd"
)

func normalizeCharacterProperty(propName, propVal string) (string, error) {
	name, ok := propertyNameAbbs[ucd.NormalizeSymbolicValue(propName)]
	if !ok {
		return "", fmt.Errorf("unsupported character property name: %v", propName)
	}
	props, ok := derivedCoreProperties[name]
	if !ok {
		return "", nil
	}
	var b strings.Builder
	yes, ok := binaryValues[ucd.NormalizeSymbolicValue(propVal)]
	if !ok {
		return "", fmt.Errorf("unsupported character property value: %v", propVal)
	}
	if yes {
		fmt.Fprint(&b, "[")
	} else {
		fmt.Fprint(&b, "[^")
	}
	for _, prop := range props {
		fmt.Fprint(&b, prop)
	}
	fmt.Fprint(&b, "]")
	
	return b.String(), nil
}

func findCodePointRanges(propName, propVal string) ([]*ucd.CodePointRange, bool, error) {
	name, ok := propertyNameAbbs[ucd.NormalizeSymbolicValue(propName)]
	if !ok {
		return nil, false, fmt.Errorf("unsupported character property name: %v", propName)
	}
	switch name {
	case "gc":
		val, ok := generalCategoryValueAbbs[ucd.NormalizeSymbolicValue(propVal)]
		if !ok {
			return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
		}
		vals, ok := compositGeneralCategories[val]
		if !ok {
			vals = []string{val}
		}
		var ranges []*ucd.CodePointRange
		for _, v := range vals {
			rs, ok := generalCategoryCodePoints[v]
			if !ok {
				return nil, false, fmt.Errorf("invalid value of the General_Category property: %v", v)
			}
			ranges = append(ranges, rs...)
		}
		return ranges, false, nil
	case "olower":
		yes, ok := binaryValues[ucd.NormalizeSymbolicValue(propVal)]
		if !ok {
			return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
		}
		if yes {
			return otherLowercaseCodePoints, false, nil
		} else {
			return otherLowercaseCodePoints, true, nil
		}
	case "oupper":
		yes, ok := binaryValues[ucd.NormalizeSymbolicValue(propVal)]
		if !ok {
			return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
		}
		if yes {
			return otherUppercaseCodePoints, false, nil
		} else {
			return otherUppercaseCodePoints, true, nil
		}
	case "wspace":
		yes, ok := binaryValues[ucd.NormalizeSymbolicValue(propVal)]
		if !ok {
			return nil, false, fmt.Errorf("unsupported character property value: %v", propVal)
		}
		if yes {
			return whiteSpaceCodePoints, false, nil
		} else {
			return whiteSpaceCodePoints, true, nil
		}
	}

	// If the process reaches this code, it's a bug. We must handle all of the properties registered with
	// the `propertyNameAbbs`.
	return nil, false, fmt.Errorf("character property '%v' is unavailable", propName)
}