aboutsummaryrefslogtreecommitdiff
path: root/compiler/parser.go
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-11-28 00:37:03 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-11-28 13:53:18 +0900
commitcb9d92f0b4e0097579f6e5da1dc6e2f063b532a9 (patch)
tree3d43aa9f3463f93451532d6dec9442a664d2d0a9 /compiler/parser.go
parentMove all UCD-related processes to ucd package (diff)
downloadtre-cb9d92f0b4e0097579f6e5da1dc6e2f063b532a9.tar.gz
tre-cb9d92f0b4e0097579f6e5da1dc6e2f063b532a9.tar.xz
Make contributory properties unavailable except internal use
This change follows [UAX #44 5.13 Property APIs]. > The following subtypes of Unicode character properties should generally not be exposed in APIs, > except in limited circumstances. They may not be useful, particularly in public API collections, > and may instead prove misleading to the users of such API collections. > > * Contributory properties are not recommended for public APIs. > ... https://unicode.org/reports/tr44/#Property_APIs
Diffstat (limited to 'compiler/parser.go')
-rw-r--r--compiler/parser.go31
1 files changed, 30 insertions, 1 deletions
diff --git a/compiler/parser.go b/compiler/parser.go
index 980e65c..89c8301 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -251,14 +251,38 @@ type parser struct {
lastTok *token
incomplete bool
errMsgDetails string
+
+ // If and only if isContributoryPropertyExposed is true, the parser interprets contributory properties that
+ // appear in property expressions.
+ //
+ // The contributory properties are not exposed, and users cannot use those properties because the parser
+ // follows [UAX #44 5.13 Property APIs]. For instance, \p{Other_Alphabetic} is invalid.
+ //
+ // isContributoryPropertyExposed is set to true when the parser is generated recursively. The parser needs to
+ // interpret derived properties internally because the derived properties consist of other properties that
+ // may contain the contributory properties.
+ //
+ // [UAX #44 5.13 Property APIs] says:
+ // > The following subtypes of Unicode character properties should generally not be exposed in APIs,
+ // > except in limited circumstances. They may not be useful, particularly in public API collections,
+ // > and may instead prove misleading to the users of such API collections.
+ // > * Contributory properties are not recommended for public APIs.
+ // > ...
+ // https://unicode.org/reports/tr44/#Property_APIs
+ isContributoryPropertyExposed bool
}
func newParser(src io.Reader) *parser {
return &parser{
- lex: newLexer(src),
+ lex: newLexer(src),
+ isContributoryPropertyExposed: false,
}
}
+func (p *parser) exposeContributoryProperty() {
+ p.isContributoryPropertyExposed = true
+}
+
func (p *parser) parse() (ast astNode, retErr error) {
defer func() {
err := recover()
@@ -548,6 +572,10 @@ func (p *parser) parseCharProp() astNode {
propName = ""
propVal = sym1
}
+ if !p.isContributoryPropertyExposed && ucd.IsContributoryProperty(propName) {
+ p.errMsgDetails = propName
+ raiseSyntaxError(synErrCharPropUnsupported)
+ }
pat, err := ucd.NormalizeCharacterProperty(propName, propVal)
if err != nil {
p.errMsgDetails = fmt.Sprintf("%v", err)
@@ -555,6 +583,7 @@ func (p *parser) parseCharProp() astNode {
}
if pat != "" {
p := newParser(bytes.NewReader([]byte(pat)))
+ p.exposeContributoryProperty()
ast, err := p.parse()
if err != nil {
panic(err)