aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/ast.go42
-rw-r--r--compiler/ast_test.go6
-rw-r--r--compiler/lexer.go26
-rw-r--r--compiler/lexer_test.go12
-rw-r--r--compiler/parser.go12
-rw-r--r--driver/lexer_test.go40
6 files changed, 117 insertions, 21 deletions
diff --git a/compiler/ast.go b/compiler/ast.go
index 17054d0..e4609ac 100644
--- a/compiler/ast.go
+++ b/compiler/ast.go
@@ -338,6 +338,48 @@ func (n *repeatNode) last() symbolPositionSet {
return s
}
+func newRepeatOneOrMoreNode(left astNode) *concatNode {
+ return newConcatNode(
+ left,
+ &repeatNode{
+ left: left,
+ })
+}
+
+type optionNode struct {
+ left astNode
+}
+
+func newOptionNode(left astNode) *optionNode {
+ return &optionNode{
+ left: left,
+ }
+}
+
+func (n *optionNode) String() string {
+ return fmt.Sprintf("{type: option}")
+}
+
+func (n *optionNode) children() (astNode, astNode) {
+ return n.left, nil
+}
+
+func (n *optionNode) nullable() bool {
+ return true
+}
+
+func (n *optionNode) first() symbolPositionSet {
+ s := newSymbolPositionSet()
+ s.merge(n.left.first())
+ return s
+}
+
+func (n *optionNode) last() symbolPositionSet {
+ s := newSymbolPositionSet()
+ s.merge(n.left.last())
+ return s
+}
+
type followTable map[symbolPosition]symbolPositionSet
func genFollowTable(root astNode) followTable {
diff --git a/compiler/ast_test.go b/compiler/ast_test.go
index 61d6064..735ccf8 100644
--- a/compiler/ast_test.go
+++ b/compiler/ast_test.go
@@ -102,6 +102,12 @@ func TestASTNode(t *testing.T) {
first: newSymbolPositionSet().add(1),
last: newSymbolPositionSet().add(1),
},
+ {
+ root: newOptionNode(newSymbolNode(nil, 0, 1)),
+ nullable: true,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
}
for i, tt := range tests {
t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
diff --git a/compiler/lexer.go b/compiler/lexer.go
index 3e3cf35..91cd209 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -9,15 +9,17 @@ import (
type tokenKind string
const (
- tokenKindChar = tokenKind("char")
- tokenKindAnyChar = tokenKind(".")
- tokenKindRepeat = tokenKind("*")
- tokenKindAlt = tokenKind("|")
- tokenKindGroupOpen = tokenKind("(")
- tokenKindGroupClose = tokenKind(")")
- tokenKindBExpOpen = tokenKind("[")
- tokenKindBExpClose = tokenKind("]")
- tokenKindEOF = tokenKind("eof")
+ tokenKindChar = tokenKind("char")
+ tokenKindAnyChar = tokenKind(".")
+ tokenKindRepeat = tokenKind("*")
+ tokenKindRepeatOneOrMore = tokenKind("+")
+ tokenKindOption = tokenKind("?")
+ tokenKindAlt = tokenKind("|")
+ tokenKindGroupOpen = tokenKind("(")
+ tokenKindGroupClose = tokenKind(")")
+ tokenKindBExpOpen = tokenKind("[")
+ tokenKindBExpClose = tokenKind("]")
+ tokenKindEOF = tokenKind("eof")
)
type token struct {
@@ -80,6 +82,10 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
switch c {
case '*':
return newToken(tokenKindRepeat, nullChar), nil
+ case '+':
+ return newToken(tokenKindRepeatOneOrMore, nullChar), nil
+ case '?':
+ return newToken(tokenKindOption, nullChar), nil
case '.':
return newToken(tokenKindAnyChar, nullChar), nil
case '|':
@@ -104,7 +110,7 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
}
}
switch {
- case c == '\\' || c == '.' || c == '*' || c == '|' || c == '(' || c == ')' || c == '[' || c == ']':
+ case c == '\\' || c == '.' || c == '*' || c == '+' || c == '?' || c == '|' || c == '(' || c == ')' || c == '[' || c == ']':
return newToken(tokenKindChar, c), nil
default:
return nil, &SyntaxError{
diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go
index 11cf043..e2e7d7a 100644
--- a/compiler/lexer_test.go
+++ b/compiler/lexer_test.go
@@ -31,10 +31,12 @@ func TestLexer(t *testing.T) {
},
{
caption: "lexer can recognize the special characters",
- src: ".*|()[]",
+ src: ".*+?|()[]",
tokens: []*token{
newToken(tokenKindAnyChar, nullChar),
newToken(tokenKindRepeat, nullChar),
+ newToken(tokenKindRepeatOneOrMore, nullChar),
+ newToken(tokenKindOption, nullChar),
newToken(tokenKindAlt, nullChar),
newToken(tokenKindGroupOpen, nullChar),
newToken(tokenKindGroupClose, nullChar),
@@ -45,11 +47,13 @@ func TestLexer(t *testing.T) {
},
{
caption: "lexer can recognize the escape sequences",
- src: "\\\\\\.\\*\\|\\(\\)\\[\\]",
+ src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[\\]",
tokens: []*token{
newToken(tokenKindChar, '\\'),
newToken(tokenKindChar, '.'),
newToken(tokenKindChar, '*'),
+ newToken(tokenKindChar, '+'),
+ newToken(tokenKindChar, '?'),
newToken(tokenKindChar, '|'),
newToken(tokenKindChar, '('),
newToken(tokenKindChar, ')'),
@@ -60,12 +64,14 @@ func TestLexer(t *testing.T) {
},
{
caption: "in a bracket expression, the special characters are also handled as normal characters",
- src: "[\\\\.*|()[\\]].*|()][",
+ src: "[\\\\.*+?|()[\\]].*|()][",
tokens: []*token{
newToken(tokenKindBExpOpen, nullChar),
newToken(tokenKindChar, '\\'),
newToken(tokenKindChar, '.'),
newToken(tokenKindChar, '*'),
+ newToken(tokenKindChar, '+'),
+ newToken(tokenKindChar, '?'),
newToken(tokenKindChar, '|'),
newToken(tokenKindChar, '('),
newToken(tokenKindChar, ')'),
diff --git a/compiler/parser.go b/compiler/parser.go
index ede601d..04cd8ad 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -144,10 +144,16 @@ func (p *parser) parseConcat() astNode {
func (p *parser) parseRepeat() astNode {
group := p.parseGroup()
- if !p.consume(tokenKindRepeat) {
- return group
+ if p.consume(tokenKindRepeat) {
+ return newRepeatNode(group)
}
- return newRepeatNode(group)
+ if p.consume(tokenKindRepeatOneOrMore) {
+ return newRepeatOneOrMoreNode(group)
+ }
+ if p.consume(tokenKindOption) {
+ return newOptionNode(group)
+ }
+ return group
}
func (p *parser) parseGroup() astNode {
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 133b758..283d5fe 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -19,15 +19,15 @@ func TestLexer_Next(t *testing.T) {
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
spec.NewLexEntry("t1", "(a|b)*abb"),
- spec.NewLexEntry("t2", " *"),
+ spec.NewLexEntry("t2", " +"),
},
},
- src: "abb aabb aaabb babb bbabb abbbabb",
+ src: "abb aabb aaabb babb bbabb abbbabb",
tokens: []*Token{
newToken(1, "t1", []byte("abb")),
newToken(2, "t2", []byte(" ")),
newToken(1, "t1", []byte("aabb")),
- newToken(2, "t2", []byte(" ")),
+ newToken(2, "t2", []byte(" ")),
newToken(1, "t1", []byte("aaabb")),
newToken(2, "t2", []byte(" ")),
newToken(1, "t1", []byte("babb")),
@@ -41,6 +41,34 @@ func TestLexer_Next(t *testing.T) {
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
+ spec.NewLexEntry("t1", "b?a+"),
+ spec.NewLexEntry("t2", "(ab)?(cd)+"),
+ spec.NewLexEntry("t3", " +"),
+ },
+ },
+ src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
+ tokens: []*Token{
+ newToken(1, "t1", []byte("ba")),
+ newToken(3, "t3", []byte(" ")),
+ newToken(1, "t1", []byte("baaa")),
+ newToken(3, "t3", []byte(" ")),
+ newToken(1, "t1", []byte("a")),
+ newToken(3, "t3", []byte(" ")),
+ newToken(1, "t1", []byte("aaa")),
+ newToken(3, "t3", []byte(" ")),
+ newToken(2, "t2", []byte("abcd")),
+ newToken(3, "t3", []byte(" ")),
+ newToken(2, "t2", []byte("abcdcdcd")),
+ newToken(3, "t3", []byte(" ")),
+ newToken(2, "t2", []byte("cd")),
+ newToken(3, "t3", []byte(" ")),
+ newToken(2, "t2", []byte("cdcdcd")),
+ newEOFToken(),
+ },
+ },
+ {
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
spec.NewLexEntry("t1", "."),
},
},
@@ -85,15 +113,17 @@ func TestLexer_Next(t *testing.T) {
{
lspec: &spec.LexSpec{
Entries: []*spec.LexEntry{
- spec.NewLexEntry("t1", "[ab.*|()[\\]]"),
+ spec.NewLexEntry("t1", "[ab.*+?|()[\\]]"),
},
},
- src: "ab.*|()[]",
+ src: "ab.*+?|()[]",
tokens: []*Token{
newToken(1, "t1", []byte("a")),
newToken(1, "t1", []byte("b")),
newToken(1, "t1", []byte(".")),
newToken(1, "t1", []byte("*")),
+ newToken(1, "t1", []byte("+")),
+ newToken(1, "t1", []byte("?")),
newToken(1, "t1", []byte("|")),
newToken(1, "t1", []byte("(")),
newToken(1, "t1", []byte(")")),