From 43fdbf94ad87ea91a173c72688cad70a0a5f1ab4 Mon Sep 17 00:00:00 2001 From: Ryo Nihei Date: Fri, 30 Apr 2021 01:54:02 +0900 Subject: Add character property expression (Meet RL1.2 of UTS #18 partially) \p{property name=property value} matches a character has the property. When the property name is General_Category, it can be omitted. That is, \p{Letter} equals \p{General_Category=Letter}. Currently, only General_Category is supported. This feature meets RL1.2 of UTS #18 partially. RL1.2 Properties: https://unicode.org/reports/tr18/#RL1.2 --- compiler/ucd_table.go.tmpl | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 compiler/ucd_table.go.tmpl (limited to 'compiler/ucd_table.go.tmpl') diff --git a/compiler/ucd_table.go.tmpl b/compiler/ucd_table.go.tmpl new file mode 100644 index 0000000..a364191 --- /dev/null +++ b/compiler/ucd_table.go.tmpl @@ -0,0 +1,43 @@ +// Code generated by {{ .GeneratorName }}; DO NOT EDIT. + +package compiler + +import "github.com/nihei9/maleeni/ucd" + +// https://www.unicode.org/reports/tr44/#GC_Values_Table +var compositGeneralCategories = map[string][]string{ + // Cased_Letter + "lc": {"lu", "ll", "lt"}, + // Letter + "l": {"lu", "ll", "lt", "lm", "lo"}, + // Mark + "m": {"mm", "mc", "me"}, + // Number + "n": {"nd", "nl", "no"}, + // Punctuation + "p": {"pc", "pd", "ps", "pi", "pe", "pf", "po"}, + // Symbol + "s": {"sm", "sc", "sk", "so"}, + // Separator + "z": {"zs", "zl", "zp"}, + // Other + "c": {"cc", "cf", "cs", "co", "cn"}, +} + +// https://www.unicode.org/Public/13.0.0/ucd/PropertyAliases.txt +var propertyNameAbbs = map[string]string{ + "generalcategory": "gc", + "gc": "gc", +} + +// https://www.unicode.org/Public/13.0.0/ucd/PropertyValueAliases.txt +var generalCategoryValueAbbs = map[string]string{ {{ range $long, $abb := .PropertyValueAliases.GeneralCategory }} + "{{ $long }}": "{{ $abb }}",{{ end }} +} + +// https://www.unicode.org/Public/13.0.0/ucd/UnicodeData.txt +var generalCategoryCodePoints = map[string][]*ucd.CodePointRange{ {{ range $propName, $codePoints := .UnicodeData.GeneralCategory }} + "{{ $propName }}": { {{ range $codePoints }} + &ucd.CodePointRange{From: rune({{ .From }}), To: rune({{ .To }})},{{ end }} + },{{ end }} +} -- cgit v1.2.3