aboutsummaryrefslogtreecommitdiff
path: root/compiler/parser
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--compiler/parser/error.go4
-rw-r--r--compiler/parser/lexer.go13
-rw-r--r--compiler/parser/parser.go37
-rw-r--r--compiler/parser/parser_test.go63
-rw-r--r--compiler/parser/tree.go21
5 files changed, 105 insertions, 33 deletions
diff --git a/compiler/parser/error.go b/compiler/parser/error.go
index a90e3b3..8be6600 100644
--- a/compiler/parser/error.go
+++ b/compiler/parser/error.go
@@ -4,7 +4,7 @@ import "fmt"
var (
ParseErr = fmt.Errorf("parse error")
-
+
// lexical errors
synErrIncompletedEscSeq = fmt.Errorf("incompleted escape sequence; unexpected EOF following \\")
synErrInvalidEscSeq = fmt.Errorf("invalid escape sequence")
@@ -25,6 +25,8 @@ var (
synErrBExpUnclosed = fmt.Errorf("unclosed bracket expression")
synErrBExpInvalidForm = fmt.Errorf("invalid bracket expression")
synErrRangeInvalidOrder = fmt.Errorf("a range expression with invalid order")
+ synErrRangePropIsUnavailable = fmt.Errorf("a property expression is unavailable in a range expression")
+ synErrRangeInvalidForm = fmt.Errorf("invalid range expression")
synErrCPExpInvalidForm = fmt.Errorf("invalid code point expression")
synErrCPExpOutOfRange = fmt.Errorf("a code point must be between U+0000 to U+10FFFF")
synErrCharPropExpInvalidForm = fmt.Errorf("invalid character property expression")
diff --git a/compiler/parser/lexer.go b/compiler/parser/lexer.go
index d2237a5..01cebeb 100644
--- a/compiler/parser/lexer.go
+++ b/compiler/parser/lexer.go
@@ -176,18 +176,19 @@ func (l *lexer) next() (*token, error) {
if err != nil {
return nil, err
}
- switch tok.kind {
- case tokenKindBExpClose:
- l.modeStack.pop()
- case tokenKindCharRange:
- l.rangeState = rangeStateExpectRangeTerminator
- case tokenKindChar:
+ if tok.kind == tokenKindChar || tok.kind == tokenKindCodePointLeader || tok.kind == tokenKindCharPropLeader {
switch l.rangeState {
case rangeStateReady:
l.rangeState = rangeStateReadRangeInitiator
case rangeStateExpectRangeTerminator:
l.rangeState = rangeStateReady
}
+ }
+ switch tok.kind {
+ case tokenKindBExpClose:
+ l.modeStack.pop()
+ case tokenKindCharRange:
+ l.rangeState = rangeStateExpectRangeTerminator
case tokenKindCodePointLeader:
l.modeStack.push(lexerModeCPExp)
case tokenKindCharPropLeader:
diff --git a/compiler/parser/parser.go b/compiler/parser/parser.go
index 4cf0e66..70d1ce6 100644
--- a/compiler/parser/parser.go
+++ b/compiler/parser/parser.go
@@ -16,7 +16,7 @@ type PatternEntry struct {
}
type parser struct {
- kind spec.LexKindName
+ kind spec.LexKindName
lex *lexer
peekedTok *token
lastTok *token
@@ -46,7 +46,7 @@ type parser struct {
func NewParser(kind spec.LexKindName, src io.Reader) *parser {
return &parser{
- kind: kind,
+ kind: kind,
lex: newLexer(src),
isContributoryPropertyExposed: false,
}
@@ -243,27 +243,40 @@ func (p *parser) parseSingleChar() CPTree {
}
func (p *parser) parseBExpElem() CPTree {
- if p.consume(tokenKindCodePointLeader) {
- return p.parseCodePoint()
- }
- if p.consume(tokenKindCharPropLeader) {
- return p.parseCharProp()
+ var left CPTree
+ switch {
+ case p.consume(tokenKindCodePointLeader):
+ left = p.parseCodePoint()
+ case p.consume(tokenKindCharPropLeader):
+ left = p.parseCharProp()
+ if p.consume(tokenKindCharRange) {
+ p.raiseParseError(synErrRangePropIsUnavailable, "")
+ }
+ default:
+ left = p.parseNormalChar()
}
- left := p.parseNormalChar()
if left == nil {
return nil
}
if !p.consume(tokenKindCharRange) {
return left
}
- right := p.parseNormalChar()
+ var right CPTree
+ switch {
+ case p.consume(tokenKindCodePointLeader):
+ right = p.parseCodePoint()
+ case p.consume(tokenKindCharPropLeader):
+ p.raiseParseError(synErrRangePropIsUnavailable, "")
+ default:
+ right = p.parseNormalChar()
+ }
if right == nil {
- panic(fmt.Errorf("invalid range expression"))
+ p.raiseParseError(synErrRangeInvalidForm, "")
}
from, _, _ := left.Range()
_, to, _ := right.Range()
if !isValidOrder(from, to) {
- p.raiseParseError(synErrRangeInvalidOrder, fmt.Sprintf("[%v-%v]", from, to))
+ p.raiseParseError(synErrRangeInvalidOrder, fmt.Sprintf("%X..%X", from, to))
}
return newRangeSymbolNode(from, to)
}
@@ -484,7 +497,7 @@ func genAltNode(cs ...CPTree) CPTree {
func (p *parser) expect(expected tokenKind) {
if !p.consume(expected) {
tok := p.peekedTok
- p.raiseParseError(synErrUnexpectedToken, fmt.Sprintf("unexpected token; expected: %v, actual: %v", expected, tok.kind))
+ p.raiseParseError(synErrUnexpectedToken, fmt.Sprintf("expected: %v, actual: %v", expected, tok.kind))
}
}
diff --git a/compiler/parser/parser_test.go b/compiler/parser/parser_test.go
index beeef1b..1fa0489 100644
--- a/compiler/parser/parser_test.go
+++ b/compiler/parser/parser_test.go
@@ -408,10 +408,30 @@ func TestParse(t *testing.T) {
ast: newSymbolNode('N'),
},
{
+ pattern: "[\\u{0061}-\\u{007A}]",
+ ast: newRangeSymbolNode('a', 'z'),
+ },
+ {
pattern: "[\\p{Lu}]",
skipTestAST: true,
},
{
+ pattern: "[a-\\p{Lu}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[\\p{Lu}-z]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[\\p{Lu}-\\p{Ll}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[z-a]",
+ syntaxError: synErrRangeInvalidOrder,
+ },
+ {
pattern: "a[]",
syntaxError: synErrBExpNoElem,
},
@@ -424,6 +444,34 @@ func TestParse(t *testing.T) {
syntaxError: synErrBExpNoElem,
},
{
+ pattern: "[^a-z]",
+ skipTestAST: true,
+ },
+ {
+ pattern: "[^\\u{004E}]",
+ skipTestAST: true,
+ },
+ {
+ pattern: "[^\\u{0061}-\\u{007A}]",
+ skipTestAST: true,
+ },
+ {
+ pattern: "[^\\p{Lu}]",
+ skipTestAST: true,
+ },
+ {
+ pattern: "[^a-\\p{Lu}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[^\\p{Lu}-z]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[^\\p{Lu}-\\p{Ll}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
pattern: "[^]",
ast: newSymbolNode('^'),
},
@@ -857,23 +905,24 @@ func TestParse(t *testing.T) {
p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern))
root, err := p.Parse()
if tt.syntaxError != nil {
- // printCPTree(os.Stdout, ast, "", "")
- if err == nil {
- t.Fatalf("expected syntax error: got: nil")
+ // printCPTree(os.Stdout, root, "", "")
+ if err != ParseErr {
+ t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
}
_, synErr := p.Error()
if synErr != tt.syntaxError {
t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr)
}
if root != nil {
- t.Fatalf("tree is not nil")
+ t.Fatalf("tree must be nil")
}
} else {
if err != nil {
- t.Fatal(err)
+ detail, cause := p.Error()
+ t.Fatalf("%v: %v: %v", err, cause, detail)
}
if root == nil {
- t.Fatal("tree is nil")
+ t.Fatal("tree must be non-nil")
}
complete, err := ApplyFragments(root, fragmentTrees)
@@ -884,7 +933,7 @@ func TestParse(t *testing.T) {
t.Fatalf("incomplete fragments")
}
- // printCPTree(os.Stdout, ast, "", "")
+ // printCPTree(os.Stdout, root, "", "")
if !tt.skipTestAST {
r := root.(*rootNode)
testAST(t, tt.ast, r.tree)
diff --git a/compiler/parser/tree.go b/compiler/parser/tree.go
index 77d2e46..b5fb723 100644
--- a/compiler/parser/tree.go
+++ b/compiler/parser/tree.go
@@ -159,7 +159,7 @@ func newRangeSymbolNode(from, to rune) *symbolNode {
}
func (n *symbolNode) String() string {
- return fmt.Sprintf("symbol: %v - %v", n.From, n.To)
+ return fmt.Sprintf("symbol: %X..%X", n.From, n.To)
}
func (n *symbolNode) Range() (rune, rune, bool) {
@@ -424,13 +424,20 @@ func printCPTree(w io.Writer, t CPTree, ruledLine string, childRuledLinePrefix s
return
}
fmt.Fprintf(w, "%v%v\n", ruledLine, t)
- left, right := t.children()
children := []CPTree{}
- if left != nil {
- children = append(children, left)
- }
- if right != nil {
- children = append(children, right)
+ switch n := t.(type) {
+ case *rootNode:
+ children = append(children, n.tree)
+ case *fragmentNode:
+ children = append(children, n.tree)
+ default:
+ left, right := t.children()
+ if left != nil {
+ children = append(children, left)
+ }
+ if right != nil {
+ children = append(children, right)
+ }
}
num := len(children)
for i, child := range children {