aboutsummaryrefslogtreecommitdiff
path: root/compiler/ucd_table.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-11-25 21:18:34 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-11-25 21:18:34 +0900
commit6ebbc8f9829bf0f3127367769c662d1a8f881a2d (patch)
treee45af1104e3ce736134353c1805fe0c91d04998a /compiler/ucd_table.go
parentSupport White_Space property (Meet RL1.2 of UTS #18 partially) (diff)
downloadtre-6ebbc8f9829bf0f3127367769c662d1a8f881a2d.tar.gz
tre-6ebbc8f9829bf0f3127367769c662d1a8f881a2d.tar.xz
Support Lowercase and Uppercase property (Meet RL1.2 of UTS #18 partially)
Diffstat (limited to 'compiler/ucd_table.go')
-rw-r--r--compiler/ucd_table.go47
1 files changed, 47 insertions, 0 deletions
diff --git a/compiler/ucd_table.go b/compiler/ucd_table.go
index 4c3c723..545a9c8 100644
--- a/compiler/ucd_table.go
+++ b/compiler/ucd_table.go
@@ -24,10 +24,24 @@ var compositGeneralCategories = map[string][]string{
"c": {"cc", "cf", "cs", "co", "cn"},
}
+// https://www.unicode.org/Public/13.0.0/ucd/DerivedCoreProperties.txt
+var derivedCoreProperties = map[string][]string{
+ "lower": {`\p{Ll}`, `\p{Other_Lowercase=yes}`},
+ "upper": {`\p{Lu}`, `\p{Other_Uppercase=yes}`},
+}
+
// https://www.unicode.org/Public/13.0.0/ucd/PropertyAliases.txt
var propertyNameAbbs = map[string]string{
"generalcategory": "gc",
"gc": "gc",
+ "lowercase": "lower",
+ "lower": "lower",
+ "uppercase": "upper",
+ "upper": "upper",
+ "otherlowercase": "olower",
+ "olower": "olower",
+ "otheruppercase": "oupper",
+ "oupper": "oupper",
"whitespace": "wspace",
"wspace": "wspace",
"space": "wspace",
@@ -4105,6 +4119,39 @@ var generalCategoryCodePoints = map[string][]*ucd.CodePointRange{
}
// https://www.unicode.org/Public/13.0.0/ucd/PropList.txt
+var otherLowercaseCodePoints = []*ucd.CodePointRange{
+ &ucd.CodePointRange{From: rune(170), To: rune(170)},
+ &ucd.CodePointRange{From: rune(186), To: rune(186)},
+ &ucd.CodePointRange{From: rune(688), To: rune(696)},
+ &ucd.CodePointRange{From: rune(704), To: rune(705)},
+ &ucd.CodePointRange{From: rune(736), To: rune(740)},
+ &ucd.CodePointRange{From: rune(837), To: rune(837)},
+ &ucd.CodePointRange{From: rune(890), To: rune(890)},
+ &ucd.CodePointRange{From: rune(7468), To: rune(7530)},
+ &ucd.CodePointRange{From: rune(7544), To: rune(7544)},
+ &ucd.CodePointRange{From: rune(7579), To: rune(7615)},
+ &ucd.CodePointRange{From: rune(8305), To: rune(8305)},
+ &ucd.CodePointRange{From: rune(8319), To: rune(8319)},
+ &ucd.CodePointRange{From: rune(8336), To: rune(8348)},
+ &ucd.CodePointRange{From: rune(8560), To: rune(8575)},
+ &ucd.CodePointRange{From: rune(9424), To: rune(9449)},
+ &ucd.CodePointRange{From: rune(11388), To: rune(11389)},
+ &ucd.CodePointRange{From: rune(42652), To: rune(42653)},
+ &ucd.CodePointRange{From: rune(42864), To: rune(42864)},
+ &ucd.CodePointRange{From: rune(43000), To: rune(43001)},
+ &ucd.CodePointRange{From: rune(43868), To: rune(43871)},
+}
+
+// https://www.unicode.org/Public/13.0.0/ucd/PropList.txt
+var otherUppercaseCodePoints = []*ucd.CodePointRange{
+ &ucd.CodePointRange{From: rune(8544), To: rune(8559)},
+ &ucd.CodePointRange{From: rune(9398), To: rune(9423)},
+ &ucd.CodePointRange{From: rune(127280), To: rune(127305)},
+ &ucd.CodePointRange{From: rune(127312), To: rune(127337)},
+ &ucd.CodePointRange{From: rune(127344), To: rune(127369)},
+}
+
+// https://www.unicode.org/Public/13.0.0/ucd/PropList.txt
var whiteSpaceCodePoints = []*ucd.CodePointRange{
&ucd.CodePointRange{From: rune(9), To: rune(13)},
&ucd.CodePointRange{From: rune(32), To: rune(32)},