diff options
-rw-r--r-- | compiler/parser/error.go | 4 | ||||
-rw-r--r-- | compiler/parser/lexer.go | 13 | ||||
-rw-r--r-- | compiler/parser/parser.go | 37 | ||||
-rw-r--r-- | compiler/parser/parser_test.go | 63 | ||||
-rw-r--r-- | compiler/parser/tree.go | 21 |
5 files changed, 105 insertions, 33 deletions
diff --git a/compiler/parser/error.go b/compiler/parser/error.go index a90e3b3..8be6600 100644 --- a/compiler/parser/error.go +++ b/compiler/parser/error.go @@ -4,7 +4,7 @@ import "fmt" var ( ParseErr = fmt.Errorf("parse error") - + // lexical errors synErrIncompletedEscSeq = fmt.Errorf("incompleted escape sequence; unexpected EOF following \\") synErrInvalidEscSeq = fmt.Errorf("invalid escape sequence") @@ -25,6 +25,8 @@ var ( synErrBExpUnclosed = fmt.Errorf("unclosed bracket expression") synErrBExpInvalidForm = fmt.Errorf("invalid bracket expression") synErrRangeInvalidOrder = fmt.Errorf("a range expression with invalid order") + synErrRangePropIsUnavailable = fmt.Errorf("a property expression is unavailable in a range expression") + synErrRangeInvalidForm = fmt.Errorf("invalid range expression") synErrCPExpInvalidForm = fmt.Errorf("invalid code point expression") synErrCPExpOutOfRange = fmt.Errorf("a code point must be between U+0000 to U+10FFFF") synErrCharPropExpInvalidForm = fmt.Errorf("invalid character property expression") diff --git a/compiler/parser/lexer.go b/compiler/parser/lexer.go index d2237a5..01cebeb 100644 --- a/compiler/parser/lexer.go +++ b/compiler/parser/lexer.go @@ -176,18 +176,19 @@ func (l *lexer) next() (*token, error) { if err != nil { return nil, err } - switch tok.kind { - case tokenKindBExpClose: - l.modeStack.pop() - case tokenKindCharRange: - l.rangeState = rangeStateExpectRangeTerminator - case tokenKindChar: + if tok.kind == tokenKindChar || tok.kind == tokenKindCodePointLeader || tok.kind == tokenKindCharPropLeader { switch l.rangeState { case rangeStateReady: l.rangeState = rangeStateReadRangeInitiator case rangeStateExpectRangeTerminator: l.rangeState = rangeStateReady } + } + switch tok.kind { + case tokenKindBExpClose: + l.modeStack.pop() + case tokenKindCharRange: + l.rangeState = rangeStateExpectRangeTerminator case tokenKindCodePointLeader: l.modeStack.push(lexerModeCPExp) case tokenKindCharPropLeader: diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go index 4cf0e66..70d1ce6 100644 --- a/compiler/parser/parser.go +++ b/compiler/parser/parser.go @@ -16,7 +16,7 @@ type PatternEntry struct { } type parser struct { - kind spec.LexKindName + kind spec.LexKindName lex *lexer peekedTok *token lastTok *token @@ -46,7 +46,7 @@ type parser struct { func NewParser(kind spec.LexKindName, src io.Reader) *parser { return &parser{ - kind: kind, + kind: kind, lex: newLexer(src), isContributoryPropertyExposed: false, } @@ -243,27 +243,40 @@ func (p *parser) parseSingleChar() CPTree { } func (p *parser) parseBExpElem() CPTree { - if p.consume(tokenKindCodePointLeader) { - return p.parseCodePoint() - } - if p.consume(tokenKindCharPropLeader) { - return p.parseCharProp() + var left CPTree + switch { + case p.consume(tokenKindCodePointLeader): + left = p.parseCodePoint() + case p.consume(tokenKindCharPropLeader): + left = p.parseCharProp() + if p.consume(tokenKindCharRange) { + p.raiseParseError(synErrRangePropIsUnavailable, "") + } + default: + left = p.parseNormalChar() } - left := p.parseNormalChar() if left == nil { return nil } if !p.consume(tokenKindCharRange) { return left } - right := p.parseNormalChar() + var right CPTree + switch { + case p.consume(tokenKindCodePointLeader): + right = p.parseCodePoint() + case p.consume(tokenKindCharPropLeader): + p.raiseParseError(synErrRangePropIsUnavailable, "") + default: + right = p.parseNormalChar() + } if right == nil { - panic(fmt.Errorf("invalid range expression")) + p.raiseParseError(synErrRangeInvalidForm, "") } from, _, _ := left.Range() _, to, _ := right.Range() if !isValidOrder(from, to) { - p.raiseParseError(synErrRangeInvalidOrder, fmt.Sprintf("[%v-%v]", from, to)) + p.raiseParseError(synErrRangeInvalidOrder, fmt.Sprintf("%X..%X", from, to)) } return newRangeSymbolNode(from, to) } @@ -484,7 +497,7 @@ func genAltNode(cs ...CPTree) CPTree { func (p *parser) expect(expected tokenKind) { if !p.consume(expected) { tok := p.peekedTok - p.raiseParseError(synErrUnexpectedToken, fmt.Sprintf("unexpected token; expected: %v, actual: %v", expected, tok.kind)) + p.raiseParseError(synErrUnexpectedToken, fmt.Sprintf("expected: %v, actual: %v", expected, tok.kind)) } } diff --git a/compiler/parser/parser_test.go b/compiler/parser/parser_test.go index beeef1b..1fa0489 100644 --- a/compiler/parser/parser_test.go +++ b/compiler/parser/parser_test.go @@ -408,10 +408,30 @@ func TestParse(t *testing.T) { ast: newSymbolNode('N'), }, { + pattern: "[\\u{0061}-\\u{007A}]", + ast: newRangeSymbolNode('a', 'z'), + }, + { pattern: "[\\p{Lu}]", skipTestAST: true, }, { + pattern: "[a-\\p{Lu}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[\\p{Lu}-z]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[\\p{Lu}-\\p{Ll}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[z-a]", + syntaxError: synErrRangeInvalidOrder, + }, + { pattern: "a[]", syntaxError: synErrBExpNoElem, }, @@ -424,6 +444,34 @@ func TestParse(t *testing.T) { syntaxError: synErrBExpNoElem, }, { + pattern: "[^a-z]", + skipTestAST: true, + }, + { + pattern: "[^\\u{004E}]", + skipTestAST: true, + }, + { + pattern: "[^\\u{0061}-\\u{007A}]", + skipTestAST: true, + }, + { + pattern: "[^\\p{Lu}]", + skipTestAST: true, + }, + { + pattern: "[^a-\\p{Lu}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\p{Lu}-z]", + syntaxError: synErrRangePropIsUnavailable, + }, + { + pattern: "[^\\p{Lu}-\\p{Ll}]", + syntaxError: synErrRangePropIsUnavailable, + }, + { pattern: "[^]", ast: newSymbolNode('^'), }, @@ -857,23 +905,24 @@ func TestParse(t *testing.T) { p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern)) root, err := p.Parse() if tt.syntaxError != nil { - // printCPTree(os.Stdout, ast, "", "") - if err == nil { - t.Fatalf("expected syntax error: got: nil") + // printCPTree(os.Stdout, root, "", "") + if err != ParseErr { + t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) } _, synErr := p.Error() if synErr != tt.syntaxError { t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) } if root != nil { - t.Fatalf("tree is not nil") + t.Fatalf("tree must be nil") } } else { if err != nil { - t.Fatal(err) + detail, cause := p.Error() + t.Fatalf("%v: %v: %v", err, cause, detail) } if root == nil { - t.Fatal("tree is nil") + t.Fatal("tree must be non-nil") } complete, err := ApplyFragments(root, fragmentTrees) @@ -884,7 +933,7 @@ func TestParse(t *testing.T) { t.Fatalf("incomplete fragments") } - // printCPTree(os.Stdout, ast, "", "") + // printCPTree(os.Stdout, root, "", "") if !tt.skipTestAST { r := root.(*rootNode) testAST(t, tt.ast, r.tree) diff --git a/compiler/parser/tree.go b/compiler/parser/tree.go index 77d2e46..b5fb723 100644 --- a/compiler/parser/tree.go +++ b/compiler/parser/tree.go @@ -159,7 +159,7 @@ func newRangeSymbolNode(from, to rune) *symbolNode { } func (n *symbolNode) String() string { - return fmt.Sprintf("symbol: %v - %v", n.From, n.To) + return fmt.Sprintf("symbol: %X..%X", n.From, n.To) } func (n *symbolNode) Range() (rune, rune, bool) { @@ -424,13 +424,20 @@ func printCPTree(w io.Writer, t CPTree, ruledLine string, childRuledLinePrefix s return } fmt.Fprintf(w, "%v%v\n", ruledLine, t) - left, right := t.children() children := []CPTree{} - if left != nil { - children = append(children, left) - } - if right != nil { - children = append(children, right) + switch n := t.(type) { + case *rootNode: + children = append(children, n.tree) + case *fragmentNode: + children = append(children, n.tree) + default: + left, right := t.children() + if left != nil { + children = append(children, left) + } + if right != nil { + children = append(children, right) + } } num := len(children) for i, child := range children { |