aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-04-11 20:12:54 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-04-12 22:43:29 +0900
commitfd1e20085306c60520d6b44938097cb3145c35f0 (patch)
tree608a89ad04673394fb8eb364c32c3cf86591d204
parentFix grammar the parser accepts (diff)
downloadtre-fd1e20085306c60520d6b44938097cb3145c35f0.tar.gz
tre-fd1e20085306c60520d6b44938097cb3145c35f0.tar.xz
Increase the maximum number of symbol positions per pattern
This commit increases the maximum number of symbol positions per pattern to 2^15 (= 32,768). When the limit is exceeded, the parse method returns an error.
-rw-r--r--compiler/ast.go58
-rw-r--r--compiler/ast_test.go73
-rw-r--r--compiler/dfa_test.go16
-rw-r--r--compiler/parser.go5
-rw-r--r--compiler/parser_test.go16
5 files changed, 139 insertions, 29 deletions
diff --git a/compiler/ast.go b/compiler/ast.go
index 803d5a9..3c9624c 100644
--- a/compiler/ast.go
+++ b/compiler/ast.go
@@ -7,40 +7,46 @@ import (
"strings"
)
-type symbolPosition uint8
+type symbolPosition uint16
const (
- symbolPositionNil = symbolPosition(0)
+ symbolPositionNil = symbolPosition(0x0000) // 0000 0000 0000 0000
- symbolPositionMaskSymbol = uint8(0x00) // 0000 0000
- symbolPositionMaskEndMark = uint8(0x80) // 1000 0000
+ symbolPositionMin = uint16(0x0001) // 0000 0000 0000 0001
+ symbolPositionMax = uint16(0x7fff) // 0111 1111 1111 1111
- symbolPositionMaskValue = uint8(0x7f) // 0111 1111
+ symbolPositionMaskSymbol = uint16(0x0000) // 0000 0000 0000 0000
+ symbolPositionMaskEndMark = uint16(0x8000) // 1000 0000 0000 0000
+
+ symbolPositionMaskValue = uint16(0x7fff) // 0111 1111 1111 1111
)
-func newSymbolPosition(n uint8, endMark bool) symbolPosition {
+func newSymbolPosition(n uint16, endMark bool) (symbolPosition, error) {
+ if n < symbolPositionMin || n > symbolPositionMax {
+ return symbolPositionNil, fmt.Errorf("symbol position must be within %v to %v; n: %v, endMark: %v", symbolPositionMin, symbolPositionMax, n, endMark)
+ }
if endMark {
- return symbolPosition(n | symbolPositionMaskEndMark)
+ return symbolPosition(n | symbolPositionMaskEndMark), nil
}
- return symbolPosition(n | symbolPositionMaskSymbol)
+ return symbolPosition(n | symbolPositionMaskSymbol), nil
}
func (p symbolPosition) String() string {
if p.isEndMark() {
- return fmt.Sprintf("end#%v", uint8(p)&symbolPositionMaskValue)
+ return fmt.Sprintf("end#%v", uint16(p)&symbolPositionMaskValue)
}
- return fmt.Sprintf("sym#%v", uint8(p)&symbolPositionMaskValue)
+ return fmt.Sprintf("sym#%v", uint16(p)&symbolPositionMaskValue)
}
func (p symbolPosition) isEndMark() bool {
- if uint8(p)&symbolPositionMaskEndMark > 1 {
+ if uint16(p)&symbolPositionMaskEndMark > 1 {
return true
}
return false
}
-func (p symbolPosition) describe() (uint8, bool) {
- v := uint8(p) & symbolPositionMaskValue
+func (p symbolPosition) describe() (uint16, bool) {
+ v := uint16(p) & symbolPositionMaskValue
if p.isEndMark() {
return v, true
}
@@ -421,24 +427,36 @@ func calcFollow(follow followTable, ast astNode) {
}
}
-func positionSymbols(node astNode, n uint8) uint8 {
+func positionSymbols(node astNode, n uint16) (uint16, error) {
if node == nil {
- return n
+ return n, nil
}
l, r := node.children()
p := n
- p = positionSymbols(l, p)
- p = positionSymbols(r, p)
+ p, err := positionSymbols(l, p)
+ if err != nil {
+ return p, err
+ }
+ p, err = positionSymbols(r, p)
+ if err != nil {
+ return p, err
+ }
switch n := node.(type) {
case *symbolNode:
- n.pos = newSymbolPosition(p, false)
+ n.pos, err = newSymbolPosition(p, false)
+ if err != nil {
+ return p, err
+ }
p++
case *endMarkerNode:
- n.pos = newSymbolPosition(p, true)
+ n.pos, err = newSymbolPosition(p, true)
+ if err != nil {
+ return p, err
+ }
p++
}
- return p
+ return p, nil
}
func printAST(w io.Writer, ast astNode, ruledLine string, childRuledLinePrefix string, withAttrs bool) {
diff --git a/compiler/ast_test.go b/compiler/ast_test.go
index 7ea59e2..8b0a0ee 100644
--- a/compiler/ast_test.go
+++ b/compiler/ast_test.go
@@ -5,6 +5,79 @@ import (
"testing"
)
+func TestNewSymbolPosition(t *testing.T) {
+ tests := []struct {
+ n uint16
+ endMark bool
+ err bool
+ }{
+ {
+ n: 0,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: 0,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: true,
+ err: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) {
+ pos, err := newSymbolPosition(tt.n, tt.endMark)
+ if tt.err {
+ if err == nil {
+ t.Fatal("err is nil")
+ }
+ return
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, endMark := pos.describe()
+ if n != tt.n || endMark != tt.endMark {
+ t.Errorf("unexpected symbol position; want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark)
+ }
+ })
+ }
+}
+
func TestASTNode(t *testing.T) {
tests := []struct {
root astNode
diff --git a/compiler/dfa_test.go b/compiler/dfa_test.go
index 26ee189..dcf1ae5 100644
--- a/compiler/dfa_test.go
+++ b/compiler/dfa_test.go
@@ -16,12 +16,20 @@ func TestGenDFA(t *testing.T) {
t.Fatalf("DFA is nil")
}
- symPos := func(n uint8) symbolPosition {
- return newSymbolPosition(n, false)
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
}
- endPos := func(n uint8) symbolPosition {
- return newSymbolPosition(n, true)
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
}
s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3))
diff --git a/compiler/parser.go b/compiler/parser.go
index 623a174..8658231 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -74,7 +74,10 @@ func parse(regexps map[int][]byte) (astNode, *symbolTable, error) {
root = newAltNode(root, n)
}
}
- positionSymbols(root, 1)
+ _, err := positionSymbols(root, 1)
+ if err != nil {
+ return nil, nil, err
+ }
return root, genSymbolTable(root), nil
}
diff --git a/compiler/parser_test.go b/compiler/parser_test.go
index fd44b5d..c56a82a 100644
--- a/compiler/parser_test.go
+++ b/compiler/parser_test.go
@@ -8,12 +8,20 @@ import (
)
func TestParse(t *testing.T) {
- symPos := func(n uint8) symbolPosition {
- return newSymbolPosition(n, false)
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
}
- endPos := func(n uint8) symbolPosition {
- return newSymbolPosition(n, true)
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
}
tests := []struct {