diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-11-28 00:37:03 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-11-28 13:53:18 +0900 |
commit | cb9d92f0b4e0097579f6e5da1dc6e2f063b532a9 (patch) | |
tree | 3d43aa9f3463f93451532d6dec9442a664d2d0a9 /compiler/parser.go | |
parent | Move all UCD-related processes to ucd package (diff) | |
download | tre-cb9d92f0b4e0097579f6e5da1dc6e2f063b532a9.tar.gz tre-cb9d92f0b4e0097579f6e5da1dc6e2f063b532a9.tar.xz |
Make contributory properties unavailable except internal use
This change follows [UAX #44 5.13 Property APIs].
> The following subtypes of Unicode character properties should generally not be exposed in APIs,
> except in limited circumstances. They may not be useful, particularly in public API collections,
> and may instead prove misleading to the users of such API collections.
>
> * Contributory properties are not recommended for public APIs.
> ...
https://unicode.org/reports/tr44/#Property_APIs
Diffstat (limited to 'compiler/parser.go')
-rw-r--r-- | compiler/parser.go | 31 |
1 files changed, 30 insertions, 1 deletions
diff --git a/compiler/parser.go b/compiler/parser.go index 980e65c..89c8301 100644 --- a/compiler/parser.go +++ b/compiler/parser.go @@ -251,14 +251,38 @@ type parser struct { lastTok *token incomplete bool errMsgDetails string + + // If and only if isContributoryPropertyExposed is true, the parser interprets contributory properties that + // appear in property expressions. + // + // The contributory properties are not exposed, and users cannot use those properties because the parser + // follows [UAX #44 5.13 Property APIs]. For instance, \p{Other_Alphabetic} is invalid. + // + // isContributoryPropertyExposed is set to true when the parser is generated recursively. The parser needs to + // interpret derived properties internally because the derived properties consist of other properties that + // may contain the contributory properties. + // + // [UAX #44 5.13 Property APIs] says: + // > The following subtypes of Unicode character properties should generally not be exposed in APIs, + // > except in limited circumstances. They may not be useful, particularly in public API collections, + // > and may instead prove misleading to the users of such API collections. + // > * Contributory properties are not recommended for public APIs. + // > ... + // https://unicode.org/reports/tr44/#Property_APIs + isContributoryPropertyExposed bool } func newParser(src io.Reader) *parser { return &parser{ - lex: newLexer(src), + lex: newLexer(src), + isContributoryPropertyExposed: false, } } +func (p *parser) exposeContributoryProperty() { + p.isContributoryPropertyExposed = true +} + func (p *parser) parse() (ast astNode, retErr error) { defer func() { err := recover() @@ -548,6 +572,10 @@ func (p *parser) parseCharProp() astNode { propName = "" propVal = sym1 } + if !p.isContributoryPropertyExposed && ucd.IsContributoryProperty(propName) { + p.errMsgDetails = propName + raiseSyntaxError(synErrCharPropUnsupported) + } pat, err := ucd.NormalizeCharacterProperty(propName, propVal) if err != nil { p.errMsgDetails = fmt.Sprintf("%v", err) @@ -555,6 +583,7 @@ func (p *parser) parseCharProp() astNode { } if pat != "" { p := newParser(bytes.NewReader([]byte(pat))) + p.exposeContributoryProperty() ast, err := p.parse() if err != nil { panic(err) |