From cb9d92f0b4e0097579f6e5da1dc6e2f063b532a9 Mon Sep 17 00:00:00 2001 From: Ryo Nihei Date: Sun, 28 Nov 2021 00:37:03 +0900 Subject: Make contributory properties unavailable except internal use This change follows [UAX #44 5.13 Property APIs]. > The following subtypes of Unicode character properties should generally not be exposed in APIs, > except in limited circumstances. They may not be useful, particularly in public API collections, > and may instead prove misleading to the users of such API collections. > > * Contributory properties are not recommended for public APIs. > ... https://unicode.org/reports/tr44/#Property_APIs --- compiler/parser.go | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'compiler/parser.go') diff --git a/compiler/parser.go b/compiler/parser.go index 980e65c..89c8301 100644 --- a/compiler/parser.go +++ b/compiler/parser.go @@ -251,14 +251,38 @@ type parser struct { lastTok *token incomplete bool errMsgDetails string + + // If and only if isContributoryPropertyExposed is true, the parser interprets contributory properties that + // appear in property expressions. + // + // The contributory properties are not exposed, and users cannot use those properties because the parser + // follows [UAX #44 5.13 Property APIs]. For instance, \p{Other_Alphabetic} is invalid. + // + // isContributoryPropertyExposed is set to true when the parser is generated recursively. The parser needs to + // interpret derived properties internally because the derived properties consist of other properties that + // may contain the contributory properties. + // + // [UAX #44 5.13 Property APIs] says: + // > The following subtypes of Unicode character properties should generally not be exposed in APIs, + // > except in limited circumstances. They may not be useful, particularly in public API collections, + // > and may instead prove misleading to the users of such API collections. + // > * Contributory properties are not recommended for public APIs. + // > ... + // https://unicode.org/reports/tr44/#Property_APIs + isContributoryPropertyExposed bool } func newParser(src io.Reader) *parser { return &parser{ - lex: newLexer(src), + lex: newLexer(src), + isContributoryPropertyExposed: false, } } +func (p *parser) exposeContributoryProperty() { + p.isContributoryPropertyExposed = true +} + func (p *parser) parse() (ast astNode, retErr error) { defer func() { err := recover() @@ -548,6 +572,10 @@ func (p *parser) parseCharProp() astNode { propName = "" propVal = sym1 } + if !p.isContributoryPropertyExposed && ucd.IsContributoryProperty(propName) { + p.errMsgDetails = propName + raiseSyntaxError(synErrCharPropUnsupported) + } pat, err := ucd.NormalizeCharacterProperty(propName, propVal) if err != nil { p.errMsgDetails = fmt.Sprintf("%v", err) @@ -555,6 +583,7 @@ func (p *parser) parseCharProp() astNode { } if pat != "" { p := newParser(bytes.NewReader([]byte(pat))) + p.exposeContributoryProperty() ast, err := p.parse() if err != nil { panic(err) -- cgit v1.2.3