aboutsummaryrefslogtreecommitdiff
path: root/compiler/ucd_table.go.tmpl
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-04-30 01:54:02 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-04-30 01:54:02 +0900
commit43fdbf94ad87ea91a173c72688cad70a0a5f1ab4 (patch)
tree655f651e39f13b5e415445d1ef24f4ecb7511041 /compiler/ucd_table.go.tmpl
parentAdd code point expression (Meet RL1.1 of UTS #18) (diff)
downloadtre-43fdbf94ad87ea91a173c72688cad70a0a5f1ab4.tar.gz
tre-43fdbf94ad87ea91a173c72688cad70a0a5f1ab4.tar.xz
Add character property expression (Meet RL1.2 of UTS #18 partially)
\p{property name=property value} matches a character has the property. When the property name is General_Category, it can be omitted. That is, \p{Letter} equals \p{General_Category=Letter}. Currently, only General_Category is supported. This feature meets RL1.2 of UTS #18 partially. RL1.2 Properties: https://unicode.org/reports/tr18/#RL1.2
Diffstat (limited to 'compiler/ucd_table.go.tmpl')
-rw-r--r--compiler/ucd_table.go.tmpl43
1 files changed, 43 insertions, 0 deletions
diff --git a/compiler/ucd_table.go.tmpl b/compiler/ucd_table.go.tmpl
new file mode 100644
index 0000000..a364191
--- /dev/null
+++ b/compiler/ucd_table.go.tmpl
@@ -0,0 +1,43 @@
+// Code generated by {{ .GeneratorName }}; DO NOT EDIT.
+
+package compiler
+
+import "github.com/nihei9/maleeni/ucd"
+
+// https://www.unicode.org/reports/tr44/#GC_Values_Table
+var compositGeneralCategories = map[string][]string{
+ // Cased_Letter
+ "lc": {"lu", "ll", "lt"},
+ // Letter
+ "l": {"lu", "ll", "lt", "lm", "lo"},
+ // Mark
+ "m": {"mm", "mc", "me"},
+ // Number
+ "n": {"nd", "nl", "no"},
+ // Punctuation
+ "p": {"pc", "pd", "ps", "pi", "pe", "pf", "po"},
+ // Symbol
+ "s": {"sm", "sc", "sk", "so"},
+ // Separator
+ "z": {"zs", "zl", "zp"},
+ // Other
+ "c": {"cc", "cf", "cs", "co", "cn"},
+}
+
+// https://www.unicode.org/Public/13.0.0/ucd/PropertyAliases.txt
+var propertyNameAbbs = map[string]string{
+ "generalcategory": "gc",
+ "gc": "gc",
+}
+
+// https://www.unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt
+var generalCategoryValueAbbs = map[string]string{ {{ range $long, $abb := .PropertyValueAliases.GeneralCategory }}
+ "{{ $long }}": "{{ $abb }}",{{ end }}
+}
+
+// https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt
+var generalCategoryCodePoints = map[string][]*ucd.CodePointRange{ {{ range $propName, $codePoints := .UnicodeData.GeneralCategory }}
+ "{{ $propName }}": { {{ range $codePoints }}
+ &ucd.CodePointRange{From: rune({{ .From }}), To: rune({{ .To }})},{{ end }}
+ },{{ end }}
+}