aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/parser.go31
-rw-r--r--compiler/parser_test.go32
-rw-r--r--ucd/api.go13
-rw-r--r--ucd/property.go14
4 files changed, 89 insertions, 1 deletions
diff --git a/compiler/parser.go b/compiler/parser.go
index 980e65c..89c8301 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -251,14 +251,38 @@ type parser struct {
lastTok *token
incomplete bool
errMsgDetails string
+
+ // If and only if isContributoryPropertyExposed is true, the parser interprets contributory properties that
+ // appear in property expressions.
+ //
+ // The contributory properties are not exposed, and users cannot use those properties because the parser
+ // follows [UAX #44 5.13 Property APIs]. For instance, \p{Other_Alphabetic} is invalid.
+ //
+ // isContributoryPropertyExposed is set to true when the parser is generated recursively. The parser needs to
+ // interpret derived properties internally because the derived properties consist of other properties that
+ // may contain the contributory properties.
+ //
+ // [UAX #44 5.13 Property APIs] says:
+ // > The following subtypes of Unicode character properties should generally not be exposed in APIs,
+ // > except in limited circumstances. They may not be useful, particularly in public API collections,
+ // > and may instead prove misleading to the users of such API collections.
+ // > * Contributory properties are not recommended for public APIs.
+ // > ...
+ // https://unicode.org/reports/tr44/#Property_APIs
+ isContributoryPropertyExposed bool
}
func newParser(src io.Reader) *parser {
return &parser{
- lex: newLexer(src),
+ lex: newLexer(src),
+ isContributoryPropertyExposed: false,
}
}
+func (p *parser) exposeContributoryProperty() {
+ p.isContributoryPropertyExposed = true
+}
+
func (p *parser) parse() (ast astNode, retErr error) {
defer func() {
err := recover()
@@ -548,6 +572,10 @@ func (p *parser) parseCharProp() astNode {
propName = ""
propVal = sym1
}
+ if !p.isContributoryPropertyExposed && ucd.IsContributoryProperty(propName) {
+ p.errMsgDetails = propName
+ raiseSyntaxError(synErrCharPropUnsupported)
+ }
pat, err := ucd.NormalizeCharacterProperty(propName, propVal)
if err != nil {
p.errMsgDetails = fmt.Sprintf("%v", err)
@@ -555,6 +583,7 @@ func (p *parser) parseCharProp() astNode {
}
if pat != "" {
p := newParser(bytes.NewReader([]byte(pat)))
+ p.exposeContributoryProperty()
ast, err := p.parse()
if err != nil {
panic(err)
diff --git a/compiler/parser_test.go b/compiler/parser_test.go
index 7c33fb4..e4a6fe2 100644
--- a/compiler/parser_test.go
+++ b/compiler/parser_test.go
@@ -6,6 +6,7 @@ import (
"testing"
"github.com/nihei9/maleeni/spec"
+ "github.com/nihei9/maleeni/ucd"
)
func symPos(n uint16) symbolPosition {
@@ -1241,6 +1242,37 @@ func TestParse(t *testing.T) {
}
}
+func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) {
+ for _, cProp := range ucd.ContributoryProperties() {
+ t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) {
+ ast, _, err := parse([]*patternEntry{
+ {
+ id: spec.LexModeKindIDMin,
+ pattern: []byte(fmt.Sprintf(`\p{%v=yes}`, cProp)),
+ },
+ }, nil)
+ if err == nil {
+ t.Fatalf("expected syntax error; got: nil")
+ }
+ parseErrs, ok := err.(*ParseErrors)
+ if !ok {
+ t.Fatalf("expected ParseErrors; got: %v (type: %T)", err, err)
+ }
+ parseErr := parseErrs.Errors[0].Cause
+ synErr, ok := parseErr.(*SyntaxError)
+ if !ok {
+ t.Fatalf("expected SyntaxError; got: %v (type: %T)", parseErr, parseErr)
+ }
+ if synErr != synErrCharPropUnsupported {
+ t.Fatalf("unexpected syntax error; want: %v, got: %v", synErrCharPropUnsupported, synErr)
+ }
+ if ast != nil {
+ t.Fatalf("ast is not nil")
+ }
+ })
+ }
+}
+
func TestParse_FollowAndSymbolTable(t *testing.T) {
root, symTab, err := parse([]*patternEntry{
{
diff --git a/ucd/api.go b/ucd/api.go
index 6451f39..0212e01 100644
--- a/ucd/api.go
+++ b/ucd/api.go
@@ -39,6 +39,19 @@ func NormalizeCharacterProperty(propName, propVal string) (string, error) {
return b.String(), nil
}
+func IsContributoryProperty(propName string) bool {
+ if propName == "" {
+ return false
+ }
+
+ for _, p := range contributoryProperties {
+ if propName == p {
+ return true
+ }
+ }
+ return false
+}
+
func FindCodePointRanges(propName, propVal string) ([]*CodePointRange, bool, error) {
if propName == "" {
propName = "gc"
diff --git a/ucd/property.go b/ucd/property.go
index 71bbcad..0b2ac62 100644
--- a/ucd/property.go
+++ b/ucd/property.go
@@ -1,5 +1,19 @@
package ucd
+// contributoryProperties is a set of contributory properties maleeni uses internally. Property statuses are
+// defined in the following table.
+//
+// https://unicode.org/reports/tr44/#Property_List_Table
+var contributoryProperties = []string{
+ "oalpha",
+ "olower",
+ "oupper",
+}
+
+func ContributoryProperties() []string {
+ return contributoryProperties
+}
+
// https://www.unicode.org/reports/tr44/#GC_Values_Table
var compositGeneralCategories = map[string][]string{
// Cased_Letter