aboutsummaryrefslogtreecommitdiff
path: root/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'compiler')
-rw-r--r--compiler/ast.go38
-rw-r--r--compiler/compiler.go24
-rw-r--r--compiler/dfa_test.go2
-rw-r--r--compiler/lexer.go74
-rw-r--r--compiler/lexer_test.go28
-rw-r--r--compiler/parser.go187
-rw-r--r--compiler/parser_test.go134
-rw-r--r--compiler/syntax_error.go2
8 files changed, 440 insertions, 49 deletions
diff --git a/compiler/ast.go b/compiler/ast.go
index a419f98..7d3965a 100644
--- a/compiler/ast.go
+++ b/compiler/ast.go
@@ -19,6 +19,7 @@ var (
_ astNode = &concatNode{}
_ astNode = &altNode{}
_ astNode = &optionNode{}
+ _ astNode = &fragmentNode{}
)
type symbolNode struct {
@@ -306,6 +307,38 @@ func (n *optionNode) last() *symbolPositionSet {
return n.lastMemo
}
+type fragmentNode struct {
+ symbol string
+ left astNode
+}
+
+func newFragmentNode(symbol string, ast astNode) *fragmentNode {
+ return &fragmentNode{
+ symbol: symbol,
+ left: ast,
+ }
+}
+
+func (n *fragmentNode) String() string {
+ return fmt.Sprintf("{type: fragment, symbol: %v}", n.symbol)
+}
+
+func (n *fragmentNode) children() (astNode, astNode) {
+ return n.left, nil
+}
+
+func (n *fragmentNode) nullable() bool {
+ return n.left.nullable()
+}
+
+func (n *fragmentNode) first() *symbolPositionSet {
+ return n.left.first()
+}
+
+func (n *fragmentNode) last() *symbolPositionSet {
+ return n.left.last()
+}
+
func copyAST(src astNode) astNode {
switch n := src.(type) {
case *symbolNode:
@@ -320,6 +353,11 @@ func copyAST(src astNode) astNode {
return newRepeatNode(copyAST(n.left))
case *optionNode:
return newOptionNode(copyAST(n.left))
+ case *fragmentNode:
+ if n.left == nil {
+ return newFragmentNode(n.symbol, nil)
+ }
+ return newFragmentNode(n.symbol, copyAST(n.left))
}
panic(fmt.Errorf("copyAST cannot handle %T type; AST: %v", src, src))
}
diff --git a/compiler/compiler.go b/compiler/compiler.go
index 2fdbce1..f382d16 100644
--- a/compiler/compiler.go
+++ b/compiler/compiler.go
@@ -54,7 +54,7 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
}
}
- modeEntries, modes, modeNums := groupEntriesByLexMode(lexspec.Entries)
+ modeEntries, modes, modeNums, fragmetns := groupEntriesByLexMode(lexspec.Entries)
modeSpecs := []*spec.CompiledLexModeSpec{
nil,
@@ -62,7 +62,7 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
for i, es := range modeEntries[1:] {
modeName := modes[i+1]
config.logger.Log("Compile %v mode:", modeName)
- modeSpec, err := compile(es, modeNums, config)
+ modeSpec, err := compile(es, modeNums, fragmetns, config)
if err != nil {
return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err)
}
@@ -77,7 +77,7 @@ func Compile(lexspec *spec.LexSpec, opts ...CompilerOption) (*spec.CompiledLexSp
}, nil
}
-func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeNum) {
+func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeNum, map[string]*spec.LexEntry) {
modes := []spec.LexModeName{
spec.LexModeNameNil,
spec.LexModeNameDefault,
@@ -89,9 +89,14 @@ func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec
lastModeNum := spec.LexModeNumDefault
modeEntries := [][]*spec.LexEntry{
nil,
- []*spec.LexEntry{},
+ {},
}
+ fragments := map[string]*spec.LexEntry{}
for _, e := range entries {
+ if e.Fragment {
+ fragments[e.Kind.String()] = e
+ continue
+ }
ms := e.Modes
if len(ms) == 0 {
ms = []spec.LexModeName{
@@ -110,10 +115,10 @@ func groupEntriesByLexMode(entries []*spec.LexEntry) ([][]*spec.LexEntry, []spec
modeEntries[num] = append(modeEntries[num], e)
}
}
- return modeEntries, modes, modeNums
+ return modeEntries, modes, modeNums, fragments
}
-func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexModeNum, config *compilerConfig) (*spec.CompiledLexModeSpec, error) {
+func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexModeNum, fragments map[string]*spec.LexEntry, config *compilerConfig) (*spec.CompiledLexModeSpec, error) {
var kinds []spec.LexKind
var patterns map[int][]byte
{
@@ -149,11 +154,16 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
pop = append(pop, popV)
}
+ fragmentPatterns := map[string][]byte{}
+ for k, e := range fragments {
+ fragmentPatterns[k] = []byte(e.Pattern)
+ }
+
var root astNode
var symTab *symbolTable
{
var err error
- root, symTab, err = parse(patterns)
+ root, symTab, err = parse(patterns, fragmentPatterns)
if err != nil {
return nil, err
}
diff --git a/compiler/dfa_test.go b/compiler/dfa_test.go
index dcf1ae5..a797313 100644
--- a/compiler/dfa_test.go
+++ b/compiler/dfa_test.go
@@ -7,7 +7,7 @@ import (
func TestGenDFA(t *testing.T) {
root, symTab, err := parse(map[int][]byte{
1: []byte("(a|b)*abb"),
- })
+ }, nil)
if err != nil {
t.Fatal(err)
}
diff --git a/compiler/lexer.go b/compiler/lexer.go
index 1aadf50..4f6fdac 100644
--- a/compiler/lexer.go
+++ b/compiler/lexer.go
@@ -24,19 +24,22 @@ const (
tokenKindCharRange = tokenKind("-")
tokenKindCodePointLeader = tokenKind("\\u")
tokenKindCharPropLeader = tokenKind("\\p")
+ tokenKindFragmentLeader = tokenKind("\\f")
tokenKindLBrace = tokenKind("{")
tokenKindRBrace = tokenKind("}")
tokenKindEqual = tokenKind("=")
tokenKindCodePoint = tokenKind("code point")
tokenKindCharPropSymbol = tokenKind("character property symbol")
+ tokenKindFragmentSymbol = tokenKind("fragment symbol")
tokenKindEOF = tokenKind("eof")
)
type token struct {
- kind tokenKind
- char rune
- propSymbol string
- codePoint string
+ kind tokenKind
+ char rune
+ propSymbol string
+ codePoint string
+ fragmentSymbol string
}
const nullChar = '\u0000'
@@ -62,6 +65,13 @@ func newCharPropSymbolToken(propSymbol string) *token {
}
}
+func newFragmentSymbolToken(fragmentSymbol string) *token {
+ return &token{
+ kind: tokenKindFragmentSymbol,
+ fragmentSymbol: fragmentSymbol,
+ }
+}
+
type lexerMode string
const (
@@ -69,6 +79,7 @@ const (
lexerModeBExp = lexerMode("bracket expression")
lexerModeCPExp = lexerMode("code point expression")
lexerModeCharPropExp = lexerMode("character property expression")
+ lexerModeFragmentExp = lexerMode("fragment expression")
)
type lexerModeStack struct {
@@ -197,6 +208,16 @@ func (l *lexer) next() (*token, error) {
l.modeStack.pop()
}
return tok, nil
+ case lexerModeFragmentExp:
+ tok, err := l.nextInFragment(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindRBrace:
+ l.modeStack.pop()
+ }
+ return tok, nil
default:
tok, err := l.nextInDefault(c)
if err != nil {
@@ -213,6 +234,8 @@ func (l *lexer) next() (*token, error) {
l.modeStack.push(lexerModeCPExp)
case tokenKindCharPropLeader:
l.modeStack.push(lexerModeCharPropExp)
+ case tokenKindFragmentLeader:
+ l.modeStack.push(lexerModeFragmentExp)
}
return tok, nil
}
@@ -294,6 +317,9 @@ func (l *lexer) nextInDefault(c rune) (*token, error) {
if c == 'p' {
return newToken(tokenKindCharPropLeader, nullChar), nil
}
+ if c == 'f' {
+ return newToken(tokenKindFragmentLeader, nullChar), nil
+ }
if c == '\\' || c == '.' || c == '*' || c == '+' || c == '?' || c == '|' || c == '(' || c == ')' || c == '[' || c == ']' {
return newToken(tokenKindChar, c), nil
}
@@ -455,6 +481,46 @@ func (l *lexer) nextInCharProp(c rune) (*token, error) {
}
}
+func (l *lexer) nextInFragment(c rune) (*token, error) {
+ switch c {
+ case '{':
+ return newToken(tokenKindLBrace, nullChar), nil
+ case '}':
+ return newToken(tokenKindRBrace, nullChar), nil
+ default:
+ var b strings.Builder
+ fmt.Fprint(&b, string(c))
+ n := 1
+ for {
+ c, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ l.restore()
+ if err != nil {
+ return nil, err
+ }
+ break
+ }
+ if c == '}' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ break
+ }
+ fmt.Fprint(&b, string(c))
+ n++
+ }
+ sym := strings.TrimSpace(b.String())
+ if len(sym) == 0 {
+ raiseSyntaxError(SynErrFragmentInvalidSymbol)
+ }
+ return newFragmentSymbolToken(sym), nil
+ }
+}
+
func (l *lexer) read() (rune, bool, error) {
if l.reachedEOF {
return l.lastChar, l.reachedEOF, nil
diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go
index 089253b..51eb82e 100644
--- a/compiler/lexer_test.go
+++ b/compiler/lexer_test.go
@@ -445,6 +445,34 @@ func TestLexer(t *testing.T) {
newToken(tokenKindEOF, nullChar),
},
},
+ {
+ caption: "lexer can recognize the special characters and symbols in fragment expression mode",
+ src: "\\f{integer}",
+ tokens: []*token{
+ newToken(tokenKindFragmentLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newFragmentSymbolToken("integer"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "a fragment expression is not supported in a bracket expression",
+ src: "[\\f",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "a fragment expression is not supported in an inverse bracket expression",
+ src: "[^\\f",
+ tokens: []*token{
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
}
for _, tt := range tests {
t.Run(tt.caption, func(t *testing.T) {
diff --git a/compiler/parser.go b/compiler/parser.go
index 5b8c912..06a762d 100644
--- a/compiler/parser.go
+++ b/compiler/parser.go
@@ -76,62 +76,173 @@ func genSymTab(symTab *symbolTable, node astNode) *symbolTable {
return symTab
}
-func parse(regexps map[int][]byte) (astNode, *symbolTable, error) {
+func parse(regexps map[int][]byte, fragments map[string][]byte) (astNode, *symbolTable, error) {
if len(regexps) == 0 {
return nil, nil, fmt.Errorf("parse() needs at least one token entry")
}
+
+ fragmentASTs, err := parseFragments(fragments)
+ if err != nil {
+ return nil, nil, err
+ }
+ if fragmentASTs == nil {
+ fragmentASTs = map[string]astNode{}
+ }
+
+ root, err := parseRegexp(regexps, fragmentASTs)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ return root, genSymbolTable(root), nil
+}
+
+type incompleteFragment struct {
+ kind string
+ ast astNode
+}
+
+func parseFragments(fragments map[string][]byte) (map[string]astNode, error) {
+ if len(fragments) == 0 {
+ return nil, nil
+ }
+ fragmentASTs := map[string]astNode{}
+ incompleteFragments := []*incompleteFragment{}
+ var perrs []*ParseError
+ for kind, pattern := range fragments {
+ p := newParser(bytes.NewReader(pattern))
+ ast, err := p.parse()
+ if err != nil {
+ perrs = append(perrs, &ParseError{
+ Pattern: pattern,
+ Cause: err,
+ Details: p.errMsgDetails,
+ })
+ continue
+ }
+ if p.incomplete {
+ incompleteFragments = append(incompleteFragments, &incompleteFragment{
+ kind: kind,
+ ast: ast,
+ })
+ } else {
+ fragmentASTs[kind] = ast
+ }
+ }
+ for len(incompleteFragments) > 0 {
+ lastIncompCount := len(incompleteFragments)
+ remainingFragments := []*incompleteFragment{}
+ for _, e := range incompleteFragments {
+ remains := applyFragments(e.ast, fragmentASTs)
+ if len(remains) > 0 {
+ remainingFragments = append(remainingFragments, e)
+ } else {
+ fragmentASTs[e.kind] = e.ast
+ }
+ }
+ incompleteFragments = remainingFragments
+ if len(incompleteFragments) == lastIncompCount {
+ for _, e := range incompleteFragments {
+ perrs = append(perrs, &ParseError{
+ Cause: fmt.Errorf("%v has an undefined fragment or a cycle", e.kind),
+ })
+ }
+ break
+ }
+ }
+ if len(perrs) > 0 {
+ return nil, &ParseErrors{
+ Errors: perrs,
+ }
+ }
+
+ return fragmentASTs, nil
+}
+
+func parseRegexp(regexps map[int][]byte, fragmentASTs map[string]astNode) (astNode, error) {
symPos := symbolPositionMin
var root astNode
var perrs []*ParseError
for id, pattern := range regexps {
- p := newParser(id, symPos, bytes.NewReader(pattern))
+ p := newParser(bytes.NewReader(pattern))
ast, err := p.parse()
if err != nil {
- perr := &ParseError{
+ perrs = append(perrs, &ParseError{
ID: id,
Pattern: pattern,
Cause: err,
Details: p.errMsgDetails,
- }
- if p.errMsgDetails != "" {
- perr.Details = p.errMsgDetails
- }
- perrs = append(perrs, perr)
+ })
continue
}
- if root == nil {
- root = ast
- } else {
- root = newAltNode(root, ast)
+ remains := applyFragments(ast, fragmentASTs)
+ if len(remains) > 0 {
+ perrs = append(perrs, &ParseError{
+ ID: id,
+ Pattern: pattern,
+ Cause: fmt.Errorf("undefined fragment: %+v", remains),
+ })
+ continue
}
- symPos = p.symPos
+ ast = newConcatNode(ast, newEndMarkerNode(id))
+ symPos, err = positionSymbols(ast, symPos)
+ if err != nil {
+ perrs = append(perrs, &ParseError{
+ ID: id,
+ Pattern: pattern,
+ Cause: err,
+ Details: p.errMsgDetails,
+ })
+ continue
+ }
+ root = genAltNode(root, ast)
}
if len(perrs) > 0 {
- return nil, nil, &ParseErrors{
+ return nil, &ParseErrors{
Errors: perrs,
}
}
- return root, genSymbolTable(root), nil
+ return root, nil
+}
+
+func applyFragments(ast astNode, fragments map[string]astNode) []string {
+ if ast == nil {
+ return nil
+ }
+ n, ok := ast.(*fragmentNode)
+ if !ok {
+ var remains []string
+ left, right := ast.children()
+ r := applyFragments(left, fragments)
+ if len(r) > 0 {
+ remains = append(remains, r...)
+ }
+ r = applyFragments(right, fragments)
+ if len(r) > 0 {
+ remains = append(remains, r...)
+ }
+ return remains
+ }
+ f, ok := fragments[n.symbol]
+ if !ok {
+ return []string{n.symbol}
+ }
+ n.left = copyAST(f)
+ return nil
}
type parser struct {
- id int
lex *lexer
peekedTok *token
lastTok *token
+ incomplete bool
errMsgDetails string
- symPos uint16
}
-func newParser(id int, initialSymPos uint16, src io.Reader) *parser {
+func newParser(src io.Reader) *parser {
return &parser{
- id: id,
- lex: newLexer(src),
- peekedTok: nil,
- lastTok: nil,
- errMsgDetails: "",
- symPos: initialSymPos,
+ lex: newLexer(src),
}
}
@@ -152,10 +263,7 @@ func (p *parser) parse() (ast astNode, retErr error) {
if err != nil {
return nil, err
}
- p.symPos, err = positionSymbols(ast, p.symPos)
- if err != nil {
- return nil, err
- }
+
return ast, nil
}
@@ -171,7 +279,7 @@ func (p *parser) parseRegexp() (astNode, error) {
raiseSyntaxError(synErrGroupNoInitiator)
}
p.expect(tokenKindEOF)
- return newConcatNode(alt, newEndMarkerNode(p.id)), nil
+ return alt, nil
}
func (p *parser) parseAlt() astNode {
@@ -315,6 +423,9 @@ func (p *parser) parseSingleChar() astNode {
if p.consume(tokenKindCharPropLeader) {
return p.parseCharProp()
}
+ if p.consume(tokenKindFragmentLeader) {
+ return p.parseFragment()
+ }
c := p.parseNormalChar()
if c == nil {
if p.consume(tokenKindBExpClose) {
@@ -446,6 +557,24 @@ func (p *parser) parseCharProp() astNode {
return alt
}
+func (p *parser) parseFragment() astNode {
+ if !p.consume(tokenKindLBrace) {
+ raiseSyntaxError(synErrFragmentExpInvalidForm)
+ }
+ if !p.consume(tokenKindFragmentSymbol) {
+ raiseSyntaxError(synErrFragmentExpInvalidForm)
+ }
+ sym := p.lastTok.fragmentSymbol
+
+ if !p.consume(tokenKindRBrace) {
+ raiseSyntaxError(synErrFragmentExpInvalidForm)
+ }
+
+ p.incomplete = true
+
+ return newFragmentNode(sym, nil)
+}
+
func (p *parser) parseNormalChar() astNode {
if !p.consume(tokenKindChar) {
return nil
diff --git a/compiler/parser_test.go b/compiler/parser_test.go
index aca08ec..3394845 100644
--- a/compiler/parser_test.go
+++ b/compiler/parser_test.go
@@ -1,7 +1,6 @@
package compiler
import (
- "bytes"
"fmt"
"reflect"
"testing"
@@ -23,9 +22,10 @@ func endPos(n uint16) symbolPosition {
return pos
}
-func TestParser_parse(t *testing.T) {
+func TestParse(t *testing.T) {
tests := []struct {
pattern string
+ fragments map[string]string
ast astNode
syntaxError *SyntaxError
@@ -92,6 +92,24 @@ func TestParser_parse(t *testing.T) {
skipTestAST: true,
},
{
+ pattern: "\\f{a2c}?",
+ fragments: map[string]string{
+ "a2c": "abc",
+ },
+ ast: genConcatNode(
+ newOptionNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
pattern: "(a)?",
ast: genConcatNode(
newOptionNode(
@@ -198,6 +216,24 @@ func TestParser_parse(t *testing.T) {
skipTestAST: true,
},
{
+ pattern: "\\f{a2c}*",
+ fragments: map[string]string{
+ "a2c": "abc",
+ },
+ ast: genConcatNode(
+ newRepeatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
pattern: "((a*)*)*",
ast: genConcatNode(
newRepeatNode(
@@ -306,6 +342,31 @@ func TestParser_parse(t *testing.T) {
skipTestAST: true,
},
{
+ pattern: "\\f{a2c}+",
+ fragments: map[string]string{
+ "a2c": "abc",
+ },
+ ast: genConcatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ ),
+ newRepeatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(4)),
+ newSymbolNodeWithPos(byte('b'), symPos(5)),
+ newSymbolNodeWithPos(byte('c'), symPos(6)),
+ ),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(7)),
+ ),
+ },
+ {
pattern: "((a+)+)+",
ast: genConcatNode(
genConcatNode(
@@ -917,6 +978,53 @@ func TestParser_parse(t *testing.T) {
syntaxError: synErrCharPropExpInvalidForm,
},
{
+ pattern: "\\f{a2c}",
+ fragments: map[string]string{
+ "a2c": "abc",
+ },
+ ast: genConcatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
+ pattern: "\\f{ a2c }",
+ fragments: map[string]string{
+ "a2c": "abc",
+ },
+ ast: genConcatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNodeWithPos(byte('a'), symPos(1)),
+ newSymbolNodeWithPos(byte('b'), symPos(2)),
+ newSymbolNodeWithPos(byte('c'), symPos(3)),
+ ),
+ ),
+ newEndMarkerNodeWithPos(1, endPos(4)),
+ ),
+ },
+ {
+ pattern: "\\f",
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
+ pattern: "\\f{",
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
+ pattern: "\\f{a2c",
+ fragments: map[string]string{
+ "a2c": "abc",
+ },
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
pattern: "(a)",
ast: newConcatNode(
newSymbolNodeWithPos(byte('a'), symPos(1)),
@@ -1085,16 +1193,26 @@ func TestParser_parse(t *testing.T) {
}
for i, tt := range tests {
t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) {
- p := newParser(1, symbolPositionMin, bytes.NewReader([]byte(tt.pattern)))
- ast, err := p.parse()
+ fragments := map[string][]byte{}
+ for kind, pattern := range tt.fragments {
+ fragments[kind] = []byte(pattern)
+ }
+ ast, _, err := parse(map[int][]byte{
+ 1: []byte(tt.pattern),
+ }, fragments)
if tt.syntaxError != nil {
// printAST(os.Stdout, ast, "", "", false)
if err == nil {
t.Fatalf("expected syntax error; got: nil")
}
- synErr, ok := err.(*SyntaxError)
+ parseErrs, ok := err.(*ParseErrors)
+ if !ok {
+ t.Fatalf("expected ParseErrors; got: %v (type: %T)", err, err)
+ }
+ parseErr := parseErrs.Errors[0].Cause
+ synErr, ok := parseErr.(*SyntaxError)
if !ok {
- t.Fatalf("expected SyntaxError; got: %v (type: %T)", err, err)
+ t.Fatalf("expected SyntaxError; got: %v (type: %T)", parseErr, parseErr)
}
if synErr != tt.syntaxError {
t.Fatalf("unexpected syntax error; want: %v, got: %v", tt.syntaxError, synErr)
@@ -1118,10 +1236,10 @@ func TestParser_parse(t *testing.T) {
}
}
-func TestParse(t *testing.T) {
+func TestParse_FollowAndSymbolTable(t *testing.T) {
root, symTab, err := parse(map[int][]byte{
1: []byte("(a|b)*abb"),
- })
+ }, nil)
if err != nil {
t.Fatal(err)
}
diff --git a/compiler/syntax_error.go b/compiler/syntax_error.go
index d784995..3925259 100644
--- a/compiler/syntax_error.go
+++ b/compiler/syntax_error.go
@@ -22,6 +22,7 @@ var (
synErrInvalidEscSeq = newSyntaxError("invalid escape sequence")
synErrInvalidCodePoint = newSyntaxError("code points must consist of just 4 or 6 hex digits")
synErrCharPropInvalidSymbol = newSyntaxError("invalid character property symbol")
+ SynErrFragmentInvalidSymbol = newSyntaxError("invalid fragment symbol")
// syntax errors
synErrUnexpectedToken = newSyntaxError("unexpected token")
@@ -40,4 +41,5 @@ var (
synErrCPExpOutOfRange = newSyntaxError("a code point must be between U+0000 to U+10FFFF")
synErrCharPropExpInvalidForm = newSyntaxError("invalid character property expression")
synErrCharPropUnsupported = newSyntaxError("unsupported character property")
+ synErrFragmentExpInvalidForm = newSyntaxError("invalid fragment expression")
)