diff options
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/parser.go | 31 | ||||
-rw-r--r-- | compiler/parser_test.go | 32 |
2 files changed, 62 insertions, 1 deletions
diff --git a/compiler/parser.go b/compiler/parser.go index 980e65c..89c8301 100644 --- a/compiler/parser.go +++ b/compiler/parser.go @@ -251,14 +251,38 @@ type parser struct { lastTok *token incomplete bool errMsgDetails string + + // If and only if isContributoryPropertyExposed is true, the parser interprets contributory properties that + // appear in property expressions. + // + // The contributory properties are not exposed, and users cannot use those properties because the parser + // follows [UAX #44 5.13 Property APIs]. For instance, \p{Other_Alphabetic} is invalid. + // + // isContributoryPropertyExposed is set to true when the parser is generated recursively. The parser needs to + // interpret derived properties internally because the derived properties consist of other properties that + // may contain the contributory properties. + // + // [UAX #44 5.13 Property APIs] says: + // > The following subtypes of Unicode character properties should generally not be exposed in APIs, + // > except in limited circumstances. They may not be useful, particularly in public API collections, + // > and may instead prove misleading to the users of such API collections. + // > * Contributory properties are not recommended for public APIs. + // > ... + // https://unicode.org/reports/tr44/#Property_APIs + isContributoryPropertyExposed bool } func newParser(src io.Reader) *parser { return &parser{ - lex: newLexer(src), + lex: newLexer(src), + isContributoryPropertyExposed: false, } } +func (p *parser) exposeContributoryProperty() { + p.isContributoryPropertyExposed = true +} + func (p *parser) parse() (ast astNode, retErr error) { defer func() { err := recover() @@ -548,6 +572,10 @@ func (p *parser) parseCharProp() astNode { propName = "" propVal = sym1 } + if !p.isContributoryPropertyExposed && ucd.IsContributoryProperty(propName) { + p.errMsgDetails = propName + raiseSyntaxError(synErrCharPropUnsupported) + } pat, err := ucd.NormalizeCharacterProperty(propName, propVal) if err != nil { p.errMsgDetails = fmt.Sprintf("%v", err) @@ -555,6 +583,7 @@ func (p *parser) parseCharProp() astNode { } if pat != "" { p := newParser(bytes.NewReader([]byte(pat))) + p.exposeContributoryProperty() ast, err := p.parse() if err != nil { panic(err) diff --git a/compiler/parser_test.go b/compiler/parser_test.go index 7c33fb4..e4a6fe2 100644 --- a/compiler/parser_test.go +++ b/compiler/parser_test.go @@ -6,6 +6,7 @@ import ( "testing" "github.com/nihei9/maleeni/spec" + "github.com/nihei9/maleeni/ucd" ) func symPos(n uint16) symbolPosition { @@ -1241,6 +1242,37 @@ func TestParse(t *testing.T) { } } +func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { + for _, cProp := range ucd.ContributoryProperties() { + t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { + ast, _, err := parse([]*patternEntry{ + { + id: spec.LexModeKindIDMin, + pattern: []byte(fmt.Sprintf(`\p{%v=yes}`, cProp)), + }, + }, nil) + if err == nil { + t.Fatalf("expected syntax error; got: nil") + } + parseErrs, ok := err.(*ParseErrors) + if !ok { + t.Fatalf("expected ParseErrors; got: %v (type: %T)", err, err) + } + parseErr := parseErrs.Errors[0].Cause + synErr, ok := parseErr.(*SyntaxError) + if !ok { + t.Fatalf("expected SyntaxError; got: %v (type: %T)", parseErr, parseErr) + } + if synErr != synErrCharPropUnsupported { + t.Fatalf("unexpected syntax error; want: %v, got: %v", synErrCharPropUnsupported, synErr) + } + if ast != nil { + t.Fatalf("ast is not nil") + } + }) + } +} + func TestParse_FollowAndSymbolTable(t *testing.T) { root, symTab, err := parse([]*patternEntry{ { |