aboutsummaryrefslogtreecommitdiff
path: root/grammar
diff options
context:
space:
mode:
Diffstat (limited to 'grammar')
-rw-r--r--grammar/first.go22
-rw-r--r--grammar/first_test.go13
-rw-r--r--grammar/grammar.go334
-rw-r--r--grammar/grammar_test.go456
-rw-r--r--grammar/item.go12
-rw-r--r--grammar/lalr1.go32
-rw-r--r--grammar/lalr1_test.go50
-rw-r--r--grammar/lexical/compiler.go413
-rw-r--r--grammar/lexical/compiler_test.go338
-rw-r--r--grammar/lexical/dfa/dfa.go173
-rw-r--r--grammar/lexical/dfa/dfa_test.go121
-rw-r--r--grammar/lexical/dfa/symbol_position.go182
-rw-r--r--grammar/lexical/dfa/symbol_position_test.go79
-rw-r--r--grammar/lexical/dfa/tree.go567
-rw-r--r--grammar/lexical/dfa/tree_test.go257
-rw-r--r--grammar/lexical/entry.go171
-rw-r--r--grammar/lexical/parser/error.go36
-rw-r--r--grammar/lexical/parser/fragment.go72
-rw-r--r--grammar/lexical/parser/lexer.go594
-rw-r--r--grammar/lexical/parser/lexer_test.go524
-rw-r--r--grammar/lexical/parser/parser.go531
-rw-r--r--grammar/lexical/parser/parser_test.go1389
-rw-r--r--grammar/lexical/parser/tree.go459
-rw-r--r--grammar/lr0.go20
-rw-r--r--grammar/lr0_test.go48
-rw-r--r--grammar/parsing_table.go95
-rw-r--r--grammar/parsing_table_test.go97
-rw-r--r--grammar/production.go28
-rw-r--r--grammar/semantic_error.go62
-rw-r--r--grammar/symbol/symbol.go (renamed from grammar/symbol.go)136
-rw-r--r--grammar/symbol/symbol_test.go (renamed from grammar/symbol_test.go)50
-rw-r--r--grammar/test_helper_test.go20
32 files changed, 6652 insertions, 729 deletions
diff --git a/grammar/first.go b/grammar/first.go
index 72de282..923f8ed 100644
--- a/grammar/first.go
+++ b/grammar/first.go
@@ -1,20 +1,24 @@
package grammar
-import "fmt"
+import (
+ "fmt"
+
+ "github.com/nihei9/vartan/grammar/symbol"
+)
type firstEntry struct {
- symbols map[symbol]struct{}
+ symbols map[symbol.Symbol]struct{}
empty bool
}
func newFirstEntry() *firstEntry {
return &firstEntry{
- symbols: map[symbol]struct{}{},
+ symbols: map[symbol.Symbol]struct{}{},
empty: false,
}
}
-func (e *firstEntry) add(sym symbol) bool {
+func (e *firstEntry) add(sym symbol.Symbol) bool {
if _, ok := e.symbols[sym]; ok {
return false
}
@@ -45,12 +49,12 @@ func (e *firstEntry) mergeExceptEmpty(target *firstEntry) bool {
}
type firstSet struct {
- set map[symbol]*firstEntry
+ set map[symbol.Symbol]*firstEntry
}
func newFirstSet(prods *productionSet) *firstSet {
fst := &firstSet{
- set: map[symbol]*firstEntry{},
+ set: map[symbol.Symbol]*firstEntry{},
}
for _, prod := range prods.getAllProductions() {
if _, ok := fst.set[prod.lhs]; ok {
@@ -69,7 +73,7 @@ func (fst *firstSet) find(prod *production, head int) (*firstEntry, error) {
return entry, nil
}
for _, sym := range prod.rhs[head:] {
- if sym.isTerminal() {
+ if sym.IsTerminal() {
entry.add(sym)
return entry, nil
}
@@ -89,7 +93,7 @@ func (fst *firstSet) find(prod *production, head int) (*firstEntry, error) {
return entry, nil
}
-func (fst *firstSet) findBySymbol(sym symbol) *firstEntry {
+func (fst *firstSet) findBySymbol(sym symbol.Symbol) *firstEntry {
return fst.set[sym]
}
@@ -130,7 +134,7 @@ func genProdFirstEntry(cc *firstComContext, acc *firstEntry, prod *production) (
}
for _, sym := range prod.rhs {
- if sym.isTerminal() {
+ if sym.IsTerminal() {
return acc.add(sym), nil
}
diff --git a/grammar/first_test.go b/grammar/first_test.go
index 21ee4df..1eff309 100644
--- a/grammar/first_test.go
+++ b/grammar/first_test.go
@@ -4,7 +4,8 @@ import (
"strings"
"testing"
- spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/grammar/symbol"
+ "github.com/nihei9/vartan/spec/grammar/parser"
)
type first struct {
@@ -137,7 +138,7 @@ bar: "bar";
fst, gram := genActualFirst(t, tt.src)
for _, ttFirst := range tt.first {
- lhsSym, ok := gram.symbolTable.toSymbol(ttFirst.lhs)
+ lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs)
if !ok {
t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs)
}
@@ -161,14 +162,14 @@ bar: "bar";
}
func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) {
- ast, err := spec.Parse(strings.NewReader(src))
+ ast, err := parser.Parse(strings.NewReader(src))
if err != nil {
t.Fatal(err)
}
b := GrammarBuilder{
AST: ast,
}
- gram, err := b.Build()
+ gram, err := b.build()
if err != nil {
t.Fatal(err)
}
@@ -183,7 +184,7 @@ func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) {
return fst, gram
}
-func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbolTableReader) *firstEntry {
+func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry {
t.Helper()
entry := newFirstEntry()
@@ -191,7 +192,7 @@ func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *s
entry.addEmpty()
}
for _, sym := range symbols {
- symSym, ok := symTab.toSymbol(sym)
+ symSym, ok := symTab.ToSymbol(sym)
if !ok {
t.Fatalf("a symbol was not found; symbol: %v", sym)
}
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 50272e0..1e05289 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -5,10 +5,11 @@ import (
"io"
"strings"
- mlcompiler "github.com/nihei9/maleeni/compiler"
- mlspec "github.com/nihei9/maleeni/spec"
verr "github.com/nihei9/vartan/error"
+ "github.com/nihei9/vartan/grammar/lexical"
+ "github.com/nihei9/vartan/grammar/symbol"
spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/spec/grammar/parser"
)
type astActionEntry struct {
@@ -33,8 +34,8 @@ const (
// We use the priority of the production to resolve shift/reduce conflicts.
type precAndAssoc struct {
// termPrec and termAssoc represent the precedence of the terminal symbols.
- termPrec map[symbolNum]int
- termAssoc map[symbolNum]assocType
+ termPrec map[symbol.SymbolNum]int
+ termAssoc map[symbol.SymbolNum]assocType
// prodPrec and prodAssoc represent the precedence and the associativities of the production.
// These values are inherited from the right-most terminal symbols in the RHS of the productions.
@@ -42,7 +43,7 @@ type precAndAssoc struct {
prodAssoc map[productionNum]assocType
}
-func (pa *precAndAssoc) terminalPrecedence(sym symbolNum) int {
+func (pa *precAndAssoc) terminalPrecedence(sym symbol.SymbolNum) int {
prec, ok := pa.termPrec[sym]
if !ok {
return precNil
@@ -51,7 +52,7 @@ func (pa *precAndAssoc) terminalPrecedence(sym symbolNum) int {
return prec
}
-func (pa *precAndAssoc) terminalAssociativity(sym symbolNum) assocType {
+func (pa *precAndAssoc) terminalAssociativity(sym symbol.SymbolNum) assocType {
assoc, ok := pa.termAssoc[sym]
if !ok {
return assocTypeNil
@@ -82,12 +83,12 @@ const reservedSymbolNameError = "error"
type Grammar struct {
name string
- lexSpec *mlspec.LexSpec
- skipLexKinds []mlspec.LexKindName
+ lexSpec *lexical.LexSpec
+ skipSymbols []symbol.Symbol
productionSet *productionSet
- augmentedStartSymbol symbol
- errorSymbol symbol
- symbolTable *symbolTableReader
+ augmentedStartSymbol symbol.Symbol
+ errorSymbol symbol.Symbol
+ symbolTable *symbol.SymbolTableReader
astActions map[productionID][]*astActionEntry
precAndAssoc *precAndAssoc
@@ -95,13 +96,34 @@ type Grammar struct {
recoverProductions map[productionID]struct{}
}
+type buildConfig struct {
+ isReportingEnabled bool
+}
+
+type BuildOption func(config *buildConfig)
+
+func EnableReporting() BuildOption {
+ return func(config *buildConfig) {
+ config.isReportingEnabled = true
+ }
+}
+
type GrammarBuilder struct {
- AST *spec.RootNode
+ AST *parser.RootNode
errs verr.SpecErrors
}
-func (b *GrammarBuilder) Build() (*Grammar, error) {
+func (b *GrammarBuilder) Build(opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) {
+ gram, err := b.build()
+ if err != nil {
+ return nil, nil, err
+ }
+
+ return compile(gram, opts...)
+}
+
+func (b *GrammarBuilder) build() (*Grammar, error) {
var specName string
{
errOccurred := false
@@ -143,12 +165,12 @@ func (b *GrammarBuilder) Build() (*Grammar, error) {
return nil, err
}
- lexSpec, err := b.genLexSpec(b.AST)
+ lexSpec, skip, err := b.genLexSpecAndSkipSymbols(symTab.Reader(), b.AST)
if err != nil {
return nil, err
}
- prodsAndActs, err := b.genProductionsAndActions(b.AST, symTab.reader(), ss.errSym, ss.augStartSym, ss.startSym)
+ prodsAndActs, err := b.genProductionsAndActions(b.AST, symTab.Reader(), ss.errSym, ss.augStartSym, ss.startSym)
if err != nil {
return nil, err
}
@@ -156,7 +178,7 @@ func (b *GrammarBuilder) Build() (*Grammar, error) {
return nil, b.errs
}
- pa, err := b.genPrecAndAssoc(symTab.reader(), ss.errSym, prodsAndActs)
+ pa, err := b.genPrecAndAssoc(symTab.Reader(), ss.errSym, prodsAndActs)
if err != nil {
return nil, err
}
@@ -171,20 +193,23 @@ func (b *GrammarBuilder) Build() (*Grammar, error) {
// When a terminal symbol that cannot be reached from the start symbol has the skip directive,
// the compiler treats its terminal as a used symbol, not unused.
- for _, sym := range lexSpec.skip {
- s := sym.String()
- if _, ok := syms.unusedTerminals[s]; !ok {
- prod := syms.usedTerminals[s]
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrTermCannotBeSkipped,
- Detail: s,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
- continue
- }
+ {
+ r := symTab.Reader()
+ for _, sym := range skip {
+ s, _ := r.ToText(sym)
+ if _, ok := syms.unusedTerminals[s]; !ok {
+ prod := syms.usedTerminals[s]
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrTermCannotBeSkipped,
+ Detail: s,
+ Row: prod.Pos.Row,
+ Col: prod.Pos.Col,
+ })
+ continue
+ }
- delete(syms.unusedTerminals, s)
+ delete(syms.unusedTerminals, s)
+ }
}
for sym, prod := range syms.unusedProductions {
@@ -209,16 +234,14 @@ func (b *GrammarBuilder) Build() (*Grammar, error) {
return nil, b.errs
}
- lexSpec.lexSpec.Name = specName
-
return &Grammar{
name: specName,
- lexSpec: lexSpec.lexSpec,
- skipLexKinds: lexSpec.skip,
+ lexSpec: lexSpec,
+ skipSymbols: skip,
productionSet: prodsAndActs.prods,
augmentedStartSymbol: prodsAndActs.augStartSym,
errorSymbol: ss.errSym,
- symbolTable: symTab.reader(),
+ symbolTable: symTab.Reader(),
astActions: prodsAndActs.astActs,
recoverProductions: prodsAndActs.recoverProds,
precAndAssoc: pa,
@@ -226,14 +249,14 @@ func (b *GrammarBuilder) Build() (*Grammar, error) {
}
type usedAndUnusedSymbols struct {
- unusedProductions map[string]*spec.ProductionNode
- unusedTerminals map[string]*spec.ProductionNode
- usedTerminals map[string]*spec.ProductionNode
+ unusedProductions map[string]*parser.ProductionNode
+ unusedTerminals map[string]*parser.ProductionNode
+ usedTerminals map[string]*parser.ProductionNode
}
-func findUsedAndUnusedSymbols(root *spec.RootNode) *usedAndUnusedSymbols {
- prods := map[string]*spec.ProductionNode{}
- lexProds := map[string]*spec.ProductionNode{}
+func findUsedAndUnusedSymbols(root *parser.RootNode) *usedAndUnusedSymbols {
+ prods := map[string]*parser.ProductionNode{}
+ lexProds := map[string]*parser.ProductionNode{}
mark := map[string]bool{}
{
for _, p := range root.Productions {
@@ -262,9 +285,9 @@ func findUsedAndUnusedSymbols(root *spec.RootNode) *usedAndUnusedSymbols {
delete(mark, reservedSymbolNameError)
}
- usedTerms := make(map[string]*spec.ProductionNode, len(lexProds))
- unusedProds := map[string]*spec.ProductionNode{}
- unusedTerms := map[string]*spec.ProductionNode{}
+ usedTerms := make(map[string]*parser.ProductionNode, len(lexProds))
+ unusedProds := map[string]*parser.ProductionNode{}
+ unusedTerms := map[string]*parser.ProductionNode{}
for sym, used := range mark {
if p, ok := prods[sym]; ok {
if used {
@@ -294,7 +317,7 @@ func findUsedAndUnusedSymbols(root *spec.RootNode) *usedAndUnusedSymbols {
}
}
-func markUsedSymbols(mark map[string]bool, marked map[string]bool, prods map[string]*spec.ProductionNode, prod *spec.ProductionNode) {
+func markUsedSymbols(mark map[string]bool, marked map[string]bool, prods map[string]*parser.ProductionNode, prod *parser.ProductionNode) {
if marked[prod.LHS] {
return
}
@@ -320,7 +343,7 @@ func markUsedSymbols(mark map[string]bool, marked map[string]bool, prods map[str
}
}
-func (b *GrammarBuilder) checkSpellingInconsistenciesOfUserDefinedIDs(root *spec.RootNode) {
+func (b *GrammarBuilder) checkSpellingInconsistenciesOfUserDefinedIDs(root *parser.RootNode) {
var ids []string
{
for _, prod := range root.Productions {
@@ -344,7 +367,7 @@ func (b *GrammarBuilder) checkSpellingInconsistenciesOfUserDefinedIDs(root *spec
}
}
- duplicated := mlspec.FindSpellingInconsistencies(ids)
+ duplicated := lexical.FindSpellingInconsistencies(ids)
if len(duplicated) == 0 {
return
}
@@ -367,7 +390,7 @@ func (b *GrammarBuilder) checkSpellingInconsistenciesOfUserDefinedIDs(root *spec
}
}
-func collectUserDefinedIDsFromDirective(dir *spec.DirectiveNode) []string {
+func collectUserDefinedIDsFromDirective(dir *parser.DirectiveNode) []string {
var ids []string
for _, param := range dir.Parameters {
if param.Group != nil {
@@ -386,20 +409,20 @@ func collectUserDefinedIDsFromDirective(dir *spec.DirectiveNode) []string {
}
type symbols struct {
- errSym symbol
- augStartSym symbol
- startSym symbol
+ errSym symbol.Symbol
+ augStartSym symbol.Symbol
+ startSym symbol.Symbol
}
-func (b *GrammarBuilder) genSymbolTable(root *spec.RootNode) (*symbolTable, *symbols, error) {
- symTab := newSymbolTable()
- w := symTab.writer()
- r := symTab.reader()
+func (b *GrammarBuilder) genSymbolTable(root *parser.RootNode) (*symbol.SymbolTable, *symbols, error) {
+ symTab := symbol.NewSymbolTable()
+ w := symTab.Writer()
+ r := symTab.Reader()
// We need to register the reserved symbol before registering others.
- var errSym symbol
+ var errSym symbol.Symbol
{
- sym, err := w.registerTerminalSymbol(reservedSymbolNameError)
+ sym, err := w.RegisterTerminalSymbol(reservedSymbolNameError)
if err != nil {
return nil, nil, err
}
@@ -407,7 +430,7 @@ func (b *GrammarBuilder) genSymbolTable(root *spec.RootNode) (*symbolTable, *sym
}
for _, prod := range root.LexProductions {
- if sym, exist := r.toSymbol(prod.LHS); exist {
+ if sym, exist := r.ToSymbol(prod.LHS); exist {
if sym == errSym {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrErrSymIsReserved,
@@ -426,7 +449,7 @@ func (b *GrammarBuilder) genSymbolTable(root *spec.RootNode) (*symbolTable, *sym
continue
}
- _, err := w.registerTerminalSymbol(prod.LHS)
+ _, err := w.RegisterTerminalSymbol(prod.LHS)
if err != nil {
return nil, nil, err
}
@@ -435,7 +458,7 @@ func (b *GrammarBuilder) genSymbolTable(root *spec.RootNode) (*symbolTable, *sym
startProd := root.Productions[0]
augStartText := fmt.Sprintf("%s'", startProd.LHS)
var err error
- augStartSym, err := w.registerStartSymbol(augStartText)
+ augStartSym, err := w.RegisterStartSymbol(augStartText)
if err != nil {
return nil, nil, err
}
@@ -447,7 +470,7 @@ func (b *GrammarBuilder) genSymbolTable(root *spec.RootNode) (*symbolTable, *sym
})
}
- startSym, err := w.registerNonTerminalSymbol(startProd.LHS)
+ startSym, err := w.RegisterNonTerminalSymbol(startProd.LHS)
if err != nil {
return nil, nil, err
}
@@ -460,11 +483,11 @@ func (b *GrammarBuilder) genSymbolTable(root *spec.RootNode) (*symbolTable, *sym
}
for _, prod := range root.Productions {
- sym, err := w.registerNonTerminalSymbol(prod.LHS)
+ sym, err := w.RegisterNonTerminalSymbol(prod.LHS)
if err != nil {
return nil, nil, err
}
- if sym.isTerminal() {
+ if sym.IsTerminal() {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDuplicateName,
Detail: prod.LHS,
@@ -488,25 +511,21 @@ func (b *GrammarBuilder) genSymbolTable(root *spec.RootNode) (*symbolTable, *sym
}, nil
}
-type lexSpec struct {
- lexSpec *mlspec.LexSpec
- skip []mlspec.LexKindName
-}
-
-func (b *GrammarBuilder) genLexSpec(root *spec.RootNode) (*lexSpec, error) {
- entries := []*mlspec.LexEntry{}
- skipKinds := []mlspec.LexKindName{}
+func (b *GrammarBuilder) genLexSpecAndSkipSymbols(symTab *symbol.SymbolTableReader, root *parser.RootNode) (*lexical.LexSpec, []symbol.Symbol, error) {
+ entries := []*lexical.LexEntry{}
+ skipSyms := []symbol.Symbol{}
for _, prod := range root.LexProductions {
entry, skip, specErr, err := genLexEntry(prod)
if err != nil {
- return nil, err
+ return nil, nil, err
}
if specErr != nil {
b.errs = append(b.errs, specErr)
continue
}
if skip {
- skipKinds = append(skipKinds, mlspec.LexKindName(prod.LHS))
+ sym, _ := symTab.ToSymbol(prod.LHS)
+ skipSyms = append(skipSyms, sym)
}
entries = append(entries, entry)
}
@@ -524,35 +543,32 @@ func (b *GrammarBuilder) genLexSpec(root *spec.RootNode) (*lexSpec, error) {
}
checkedFragments[fragment.LHS] = struct{}{}
- entries = append(entries, &mlspec.LexEntry{
+ entries = append(entries, &lexical.LexEntry{
Fragment: true,
- Kind: mlspec.LexKindName(fragment.LHS),
- Pattern: mlspec.LexPattern(fragment.RHS),
+ Kind: spec.LexKindName(fragment.LHS),
+ Pattern: fragment.RHS,
})
}
- return &lexSpec{
- lexSpec: &mlspec.LexSpec{
- Entries: entries,
- },
- skip: skipKinds,
- }, nil
+ return &lexical.LexSpec{
+ Entries: entries,
+ }, skipSyms, nil
}
-func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecError, error) {
+func genLexEntry(prod *parser.ProductionNode) (*lexical.LexEntry, bool, *verr.SpecError, error) {
alt := prod.RHS[0]
elem := alt.Elements[0]
var pattern string
if elem.Literally {
- pattern = mlspec.EscapePattern(elem.Pattern)
+ pattern = spec.EscapePattern(elem.Pattern)
} else {
pattern = elem.Pattern
}
- var modes []mlspec.LexModeName
+ var modes []spec.LexModeName
var skip bool
- var push mlspec.LexModeName
+ var push spec.LexModeName
var pop bool
dirConsumed := map[string]struct{}{}
for _, dir := range prod.Directives {
@@ -585,7 +601,7 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE
Col: param.Pos.Col,
}, nil
}
- modes = append(modes, mlspec.LexModeName(param.ID))
+ modes = append(modes, spec.LexModeName(param.ID))
}
case "skip":
if len(dir.Parameters) > 0 {
@@ -606,7 +622,7 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE
Col: dir.Pos.Col,
}, nil
}
- push = mlspec.LexModeName(dir.Parameters[0].ID)
+ push = spec.LexModeName(dir.Parameters[0].ID)
case "pop":
if len(dir.Parameters) > 0 {
return nil, false, &verr.SpecError{
@@ -636,10 +652,10 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE
}, nil
}
- return &mlspec.LexEntry{
+ return &lexical.LexEntry{
Modes: modes,
- Kind: mlspec.LexKindName(prod.LHS),
- Pattern: mlspec.LexPattern(pattern),
+ Kind: spec.LexKindName(prod.LHS),
+ Pattern: pattern,
Push: push,
Pop: pop,
}, skip, nil, nil
@@ -647,15 +663,15 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE
type productionsAndActions struct {
prods *productionSet
- augStartSym symbol
+ augStartSym symbol.Symbol
astActs map[productionID][]*astActionEntry
- prodPrecsTerm map[productionID]symbol
+ prodPrecsTerm map[productionID]symbol.Symbol
prodPrecsOrdSym map[productionID]string
- prodPrecPoss map[productionID]*spec.Position
+ prodPrecPoss map[productionID]*parser.Position
recoverProds map[productionID]struct{}
}
-func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *symbolTableReader, errSym symbol, augStartSym symbol, startSym symbol) (*productionsAndActions, error) {
+func (b *GrammarBuilder) genProductionsAndActions(root *parser.RootNode, symTab *symbol.SymbolTableReader, errSym symbol.Symbol, augStartSym symbol.Symbol, startSym symbol.Symbol) (*productionsAndActions, error) {
if len(root.Productions) == 0 {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrNoProduction,
@@ -665,12 +681,12 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *s
prods := newProductionSet()
astActs := map[productionID][]*astActionEntry{}
- prodPrecsTerm := map[productionID]symbol{}
+ prodPrecsTerm := map[productionID]symbol.Symbol{}
prodPrecsOrdSym := map[productionID]string{}
- prodPrecPoss := map[productionID]*spec.Position{}
+ prodPrecPoss := map[productionID]*parser.Position{}
recoverProds := map[productionID]struct{}{}
- p, err := newProduction(augStartSym, []symbol{
+ p, err := newProduction(augStartSym, []symbol.Symbol{
startSym,
})
if err != nil {
@@ -680,7 +696,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *s
prods.append(p)
for _, prod := range root.Productions {
- lhsSym, ok := symTab.toSymbol(prod.LHS)
+ lhsSym, ok := symTab.ToSymbol(prod.LHS)
if !ok {
// All symbols are assumed to be pre-detected, so it's a bug if we cannot find them here.
return nil, fmt.Errorf("symbol '%v' is undefined", prod.LHS)
@@ -698,11 +714,11 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *s
LOOP_RHS:
for _, alt := range prod.RHS {
- altSyms := make([]symbol, len(alt.Elements))
+ altSyms := make([]symbol.Symbol, len(alt.Elements))
offsets := map[string]int{}
ambiguousIDOffsets := map[string]struct{}{}
for i, elem := range alt.Elements {
- sym, ok := symTab.toSymbol(elem.ID)
+ sym, ok := symTab.ToSymbol(elem.ID)
if !ok {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrUndefinedSym,
@@ -724,7 +740,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *s
})
continue LOOP_RHS
}
- if _, found := symTab.toSymbol(elem.Label.Name); found {
+ if _, found := symTab.ToSymbol(elem.Label.Name); found {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrInvalidLabel,
Detail: elem.Label.Name,
@@ -877,12 +893,12 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *s
})
continue LOOP_RHS
}
- elemSym, ok := symTab.toSymbol(elem.ID)
+ elemSym, ok := symTab.ToSymbol(elem.ID)
if !ok {
// If the symbol was not found, it's a bug.
return nil, fmt.Errorf("a symbol corresponding to an ID (%v) was not found", elem.ID)
}
- if elemSym.isTerminal() {
+ if elemSym.IsTerminal() {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDirInvalidParam,
Detail: fmt.Sprintf("the expansion symbol cannot be applied to a terminal symbol (%v: %v)", param.ID, elem.ID),
@@ -912,7 +928,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *s
param := dir.Parameters[0]
switch {
case param.ID != "":
- sym, ok := symTab.toSymbol(param.ID)
+ sym, ok := symTab.ToSymbol(param.ID)
if !ok {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDirInvalidParam,
@@ -930,7 +946,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *s
Col: param.Pos.Col,
})
}
- if !sym.isTerminal() {
+ if !sym.IsTerminal() {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDirInvalidParam,
Detail: fmt.Sprintf("the symbol must be a terminal: %v", param.ID),
@@ -980,12 +996,12 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTab *s
}, nil
}
-func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbol, prodsAndActs *productionsAndActions) (*precAndAssoc, error) {
- termPrec := map[symbolNum]int{}
- termAssoc := map[symbolNum]assocType{}
+func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbol.SymbolTableReader, errSym symbol.Symbol, prodsAndActs *productionsAndActions) (*precAndAssoc, error) {
+ termPrec := map[symbol.SymbolNum]int{}
+ termAssoc := map[symbol.SymbolNum]assocType{}
ordSymPrec := map[string]int{}
{
- var precGroup []*spec.DirectiveNode
+ var precGroup []*parser.DirectiveNode
for _, dir := range b.AST.Directives {
if dir.Name == "prec" {
if dir.Parameters == nil || len(dir.Parameters) != 1 || dir.Parameters[0].Group == nil {
@@ -1045,7 +1061,7 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbo
for _, p := range dir.Parameters {
switch {
case p.ID != "":
- sym, ok := symTab.toSymbol(p.ID)
+ sym, ok := symTab.ToSymbol(p.ID)
if !ok {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDirInvalidParam,
@@ -1064,7 +1080,7 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbo
})
return nil, nil
}
- if !sym.isTerminal() {
+ if !sym.IsTerminal() {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDirInvalidParam,
Detail: fmt.Sprintf("associativity can take only terminal symbol ('%v' is a non-terminal)", p.ID),
@@ -1073,7 +1089,7 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbo
})
return nil, nil
}
- if prec, alreadySet := termPrec[sym.num()]; alreadySet {
+ if prec, alreadySet := termPrec[sym.Num()]; alreadySet {
if prec == precN {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDuplicateAssoc,
@@ -1081,7 +1097,7 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbo
Row: p.Pos.Row,
Col: p.Pos.Col,
})
- } else if assoc := termAssoc[sym.num()]; assoc == assocTy {
+ } else if assoc := termAssoc[sym.Num()]; assoc == assocTy {
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrDuplicateAssoc,
Detail: fmt.Sprintf("'%v' already has different precedence", p.ID),
@@ -1099,8 +1115,8 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbo
break ASSOC_PARAM_LOOP
}
- termPrec[sym.num()] = precN
- termAssoc[sym.num()] = assocTy
+ termPrec[sym.Num()] = precN
+ termAssoc[sym.Num()] = assocTy
case p.OrderedSymbol != "":
if prec, alreadySet := ordSymPrec[p.OrderedSymbol]; alreadySet {
if prec == precN {
@@ -1145,11 +1161,11 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbo
for _, prod := range prodsAndActs.prods.getAllProductions() {
// A #prec directive changes only precedence, not associativity.
if term, ok := prodsAndActs.prodPrecsTerm[prod.id]; ok {
- if prec, ok := termPrec[term.num()]; ok {
+ if prec, ok := termPrec[term.Num()]; ok {
prodPrec[prod.num] = prec
prodAssoc[prod.num] = assocTypeNil
} else {
- text, _ := symTab.toText(term)
+ text, _ := symTab.ToText(term)
b.errs = append(b.errs, &verr.SpecError{
Cause: semErrUndefinedPrec,
Detail: text,
@@ -1171,16 +1187,16 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbo
}
} else {
// A production inherits precedence and associativity from the right-most terminal symbol.
- mostrightTerm := symbolNil
+ mostrightTerm := symbol.SymbolNil
for _, sym := range prod.rhs {
- if !sym.isTerminal() {
+ if !sym.IsTerminal() {
continue
}
mostrightTerm = sym
}
- if !mostrightTerm.isNil() {
- prodPrec[prod.num] = termPrec[mostrightTerm.num()]
- prodAssoc[prod.num] = termAssoc[mostrightTerm.num()]
+ if !mostrightTerm.IsNil() {
+ prodPrec[prod.num] = termPrec[mostrightTerm.Num()]
+ prodAssoc[prod.num] = termAssoc[mostrightTerm.Num()]
}
}
}
@@ -1196,25 +1212,13 @@ func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbolTableReader, errSym symbo
}, nil
}
-type compileConfig struct {
- isReportingEnabled bool
-}
-
-type CompileOption func(config *compileConfig)
-
-func EnableReporting() CompileOption {
- return func(config *compileConfig) {
- config.isReportingEnabled = true
- }
-}
-
-func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec.Report, error) {
- config := &compileConfig{}
+func compile(gram *Grammar, opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) {
+ config := &buildConfig{}
for _, opt := range opts {
opt(config)
}
- lexSpec, err, cErrs := mlcompiler.Compile(gram.lexSpec, mlcompiler.CompressionLevel(mlcompiler.CompressionLevelMax))
+ lexSpec, err, cErrs := lexical.Compile(gram.lexSpec, lexical.CompressionLevelMax)
if err != nil {
if len(cErrs) > 0 {
var b strings.Builder
@@ -1230,35 +1234,44 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
kind2Term := make([]int, len(lexSpec.KindNames))
for i, k := range lexSpec.KindNames {
- if k == mlspec.LexKindNameNil {
- kind2Term[mlspec.LexKindIDNil] = symbolNil.num().Int()
+ if k == spec.LexKindNameNil {
+ kind2Term[spec.LexKindIDNil] = symbol.SymbolNil.Num().Int()
continue
}
- sym, ok := gram.symbolTable.toSymbol(k.String())
+ sym, ok := gram.symbolTable.ToSymbol(k.String())
if !ok {
return nil, nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k)
}
- kind2Term[i] = sym.num().Int()
+ kind2Term[i] = sym.Num().Int()
}
- termTexts, err := gram.symbolTable.terminalTexts()
+ termTexts, err := gram.symbolTable.TerminalTexts()
if err != nil {
return nil, nil, err
}
- termSkip := make([]int, len(termTexts))
- for i, k := range lexSpec.KindNames {
- for _, sk := range gram.skipLexKinds {
- if k != sk {
- continue
+ var termSkip []int
+ {
+ r := gram.symbolTable.Reader()
+ // I want to use gram.symbolTable.terminalSymbols() here instead of gram.symbolTable.terminalTexts(),
+ // but gram.symbolTable.terminalSymbols() is different in length from terminalTexts
+ // because it does not contain a predefined symbol, like EOF.
+ // Therefore, we use terminalTexts, although it takes more time to lookup for symbols.
+ termSkip = make([]int, len(termTexts))
+ for _, t := range termTexts {
+ s, _ := r.ToSymbol(t)
+ for _, sk := range gram.skipSymbols {
+ if s != sk {
+ continue
+ }
+ termSkip[s.Num()] = 1
+ break
}
- termSkip[kind2Term[i]] = 1
- break
}
}
- nonTerms, err := gram.symbolTable.nonTerminalTexts()
+ nonTerms, err := gram.symbolTable.NonTerminalTexts()
if err != nil {
return nil, nil, err
}
@@ -1316,7 +1329,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1)
astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1)
for _, p := range gram.productionSet.getAllProductions() {
- lhsSyms[p.num] = p.lhs.num().Int()
+ lhsSyms[p.num] = p.lhs.Num().Int()
altSymCounts[p.num] = p.rhsLen
if _, ok := gram.recoverProductions[p.id]; ok {
@@ -1339,15 +1352,9 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
}
return &spec.CompiledGrammar{
- Name: gram.name,
- LexicalSpecification: &spec.LexicalSpecification{
- Lexer: "maleeni",
- Maleeni: &spec.Maleeni{
- Spec: lexSpec,
- KindToTerminal: kind2Term,
- },
- },
- ParsingTable: &spec.ParsingTable{
+ Name: gram.name,
+ Lexical: lexSpec,
+ Syntactic: &spec.SyntacticSpec{
Action: action,
GoTo: goTo,
StateCount: tab.stateCount,
@@ -1358,10 +1365,11 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
Terminals: termTexts,
TerminalCount: tab.terminalCount,
TerminalSkip: termSkip,
+ KindToTerminal: kind2Term,
NonTerminals: nonTerms,
NonTerminalCount: tab.nonTerminalCount,
- EOFSymbol: symbolEOF.num().Int(),
- ErrorSymbol: gram.errorSymbol.num().Int(),
+ EOFSymbol: symbol.SymbolEOF.Num().Int(),
+ ErrorSymbol: gram.errorSymbol.Num().Int(),
ErrorTrapperStates: tab.errorTrapperStates,
RecoverProductions: recoverProds,
},
@@ -1371,7 +1379,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, *spec
}, report, nil
}
-func writeCompileError(w io.Writer, cErr *mlcompiler.CompileError) {
+func writeCompileError(w io.Writer, cErr *lexical.CompileError) {
if cErr.Fragment {
fmt.Fprintf(w, "fragment ")
}
diff --git a/grammar/grammar_test.go b/grammar/grammar_test.go
index f6cb681..e3cf668 100644
--- a/grammar/grammar_test.go
+++ b/grammar/grammar_test.go
@@ -5,7 +5,7 @@ import (
"testing"
verr "github.com/nihei9/vartan/error"
- spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/spec/grammar/parser"
)
func TestGrammarBuilderOK(t *testing.T) {
@@ -243,9 +243,9 @@ baz
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if fooPrec != 1 || fooAssoc != assocTypeLeft {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
@@ -253,9 +253,9 @@ baz
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if barPrec != 1 || barAssoc != assocTypeLeft {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc)
@@ -263,9 +263,9 @@ baz
var bazPrec int
var bazAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("baz")
- bazPrec = g.precAndAssoc.terminalPrecedence(s.num())
- bazAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("baz")
+ bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if bazPrec != precNil || bazAssoc != assocTypeNil {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
@@ -296,9 +296,9 @@ baz
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if fooPrec != 1 || fooAssoc != assocTypeRight {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, fooPrec, fooAssoc)
@@ -306,9 +306,9 @@ baz
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if barPrec != 1 || barAssoc != assocTypeRight {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, barPrec, barAssoc)
@@ -316,9 +316,9 @@ baz
var bazPrec int
var bazAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("baz")
- bazPrec = g.precAndAssoc.terminalPrecedence(s.num())
- bazAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("baz")
+ bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if bazPrec != precNil || bazAssoc != assocTypeNil {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
@@ -349,9 +349,9 @@ baz
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if fooPrec != 1 || fooAssoc != assocTypeNil {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, fooPrec, fooAssoc)
@@ -359,9 +359,9 @@ baz
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if barPrec != 1 || barAssoc != assocTypeNil {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, barPrec, barAssoc)
@@ -369,9 +369,9 @@ baz
var bazPrec int
var bazAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("baz")
- bazPrec = g.precAndAssoc.terminalPrecedence(s.num())
- bazAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("baz")
+ bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if bazPrec != precNil || bazAssoc != assocTypeNil {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
@@ -400,14 +400,14 @@ bar
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var sPrec int
var sAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
sPrec = g.precAndAssoc.productionPredence(ps[0].num)
sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -443,14 +443,14 @@ bar
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var sPrec int
var sAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
sPrec = g.precAndAssoc.productionPredence(ps[0].num)
sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -489,21 +489,21 @@ bar
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var aPrec int
var aAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("a")
+ s, _ := g.symbolTable.ToSymbol("a")
ps, _ := g.productionSet.findByLHS(s)
aPrec = g.precAndAssoc.productionPredence(ps[0].num)
aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -511,7 +511,7 @@ bar
var sPrec int
var sAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
sPrec = g.precAndAssoc.productionPredence(ps[0].num)
sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -567,7 +567,7 @@ bra
var alt4Prec int
var alt4Assoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -615,14 +615,14 @@ foo
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var aPrec int
var aAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("a")
+ s, _ := g.symbolTable.ToSymbol("a")
ps, _ := g.productionSet.findByLHS(s)
aPrec = g.precAndAssoc.productionPredence(ps[0].num)
aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -630,7 +630,7 @@ foo
var sPrec int
var sAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
sPrec = g.precAndAssoc.productionPredence(ps[0].num)
sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -668,14 +668,14 @@ bar
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var sPrec int
var sAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
sPrec = g.precAndAssoc.productionPredence(ps[0].num)
sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -711,21 +711,21 @@ bar
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var sPrec int
var sAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
sPrec = g.precAndAssoc.productionPredence(ps[0].num)
sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -766,9 +766,9 @@ bar
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if fooPrec != 2 || fooAssoc != assocTypeRight {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, fooPrec, fooAssoc)
@@ -776,9 +776,9 @@ bar
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if barPrec != 2 || barAssoc != assocTypeRight {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
@@ -788,7 +788,7 @@ bar
var alt2Prec int
var alt2Assoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -828,9 +828,9 @@ bar
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if fooPrec != 2 || fooAssoc != assocTypeLeft {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc)
@@ -838,9 +838,9 @@ bar
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if barPrec != 2 || barAssoc != assocTypeLeft {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, barPrec, barAssoc)
@@ -850,7 +850,7 @@ bar
var alt2Prec int
var alt2Assoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -891,9 +891,9 @@ bar
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if fooPrec != 2 || fooAssoc != assocTypeLeft {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc)
@@ -901,9 +901,9 @@ bar
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if barPrec != 3 || barAssoc != assocTypeRight {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeRight, barPrec, barAssoc)
@@ -913,7 +913,7 @@ bar
var alt2Prec int
var alt2Assoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -952,16 +952,16 @@ bar
var fooPrec int
var fooAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("foo")
+ fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if fooPrec != 1 || fooAssoc != assocTypeLeft {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
@@ -974,7 +974,7 @@ bar
var alt2Prec int
var alt2Assoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -1016,9 +1016,9 @@ bar
var barPrec int
var barAssoc assocType
{
- s, _ := g.symbolTable.toSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.num())
+ s, _ := g.symbolTable.ToSymbol("bar")
+ barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
+ barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
}
if barPrec != 1 || barAssoc != assocTypeLeft {
t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc)
@@ -1028,7 +1028,7 @@ bar
var alt2Prec int
var alt2Assoc assocType
{
- s, _ := g.symbolTable.toSymbol("s")
+ s, _ := g.symbolTable.ToSymbol("s")
ps, _ := g.productionSet.findByLHS(s)
alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
@@ -1052,7 +1052,7 @@ bar
for _, test := range tests {
t.Run(test.caption, func(t *testing.T) {
- ast, err := spec.Parse(strings.NewReader(test.specSrc))
+ ast, err := parser.Parse(strings.NewReader(test.specSrc))
if err != nil {
t.Fatal(err)
}
@@ -1060,7 +1060,7 @@ bar
b := GrammarBuilder{
AST: ast,
}
- g, err := b.Build()
+ g, err := b.build()
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@@ -1075,7 +1075,7 @@ func TestGrammarBuilderSpecError(t *testing.T) {
type specErrTest struct {
caption string
specSrc string
- errs []*SemanticError
+ errs []error
}
spellingInconsistenciesTests := []*specErrTest{
@@ -1094,7 +1094,7 @@ a_1
foo
: 'foo';
`,
- errs: []*SemanticError{semErrSpellingInconsistency},
+ errs: []error{semErrSpellingInconsistency},
},
{
caption: "a spelling inconsistency appears among terminal symbols",
@@ -1110,7 +1110,7 @@ foo1
foo_1
: 'foo_1';
`,
- errs: []*SemanticError{semErrSpellingInconsistency},
+ errs: []error{semErrSpellingInconsistency},
},
{
caption: "a spelling inconsistency appears among non-terminal and terminal symbols",
@@ -1124,7 +1124,7 @@ a1
a_1
: 'a_1';
`,
- errs: []*SemanticError{semErrSpellingInconsistency},
+ errs: []error{semErrSpellingInconsistency},
},
{
caption: "a spelling inconsistency appears among ordered symbols whose precedence is the same",
@@ -1145,7 +1145,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrSpellingInconsistency},
+ errs: []error{semErrSpellingInconsistency},
},
{
caption: "a spelling inconsistency appears among ordered symbols whose precedence is not the same",
@@ -1167,7 +1167,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrSpellingInconsistency},
+ errs: []error{semErrSpellingInconsistency},
},
{
caption: "a spelling inconsistency appears among labels the same alternative contains",
@@ -1181,7 +1181,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrSpellingInconsistency},
+ errs: []error{semErrSpellingInconsistency},
},
{
caption: "a spelling inconsistency appears among labels the same production contains",
@@ -1198,7 +1198,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrSpellingInconsistency},
+ errs: []error{semErrSpellingInconsistency},
},
{
caption: "a spelling inconsistency appears among labels different productions contain",
@@ -1217,7 +1217,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrSpellingInconsistency},
+ errs: []error{semErrSpellingInconsistency},
},
}
@@ -1237,7 +1237,7 @@ b
foo
: "foo";
`,
- errs: []*SemanticError{semErrUnusedProduction},
+ errs: []error{semErrUnusedProduction},
},
{
caption: "a terminal symbol `bar` is unused",
@@ -1253,7 +1253,7 @@ foo
bar
: "bar";
`,
- errs: []*SemanticError{semErrUnusedTerminal},
+ errs: []error{semErrUnusedTerminal},
},
{
caption: "a production `b` and terminal symbol `bar` is unused",
@@ -1272,7 +1272,7 @@ foo
bar
: "bar";
`,
- errs: []*SemanticError{
+ errs: []error{
semErrUnusedProduction,
semErrUnusedTerminal,
},
@@ -1289,7 +1289,7 @@ s #prec foo
foo
: 'foo';
`,
- errs: []*SemanticError{semErrInvalidProdDir},
+ errs: []error{semErrInvalidProdDir},
},
{
caption: "a lexical production cannot have alternative directives",
@@ -1303,7 +1303,7 @@ s
foo
: 'foo' #skip;
`,
- errs: []*SemanticError{semErrInvalidAltDir},
+ errs: []error{semErrInvalidAltDir},
},
{
caption: "a production directive must not be duplicated",
@@ -1317,7 +1317,7 @@ s
foo #skip #skip
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateDir},
+ errs: []error{semErrDuplicateDir},
},
{
caption: "an alternative directive must not be duplicated",
@@ -1333,7 +1333,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDuplicateDir},
+ errs: []error{semErrDuplicateDir},
},
{
caption: "a production must not have a duplicate alternative (non-empty alternatives)",
@@ -1348,7 +1348,7 @@ s
foo
: "foo";
`,
- errs: []*SemanticError{semErrDuplicateProduction},
+ errs: []error{semErrDuplicateProduction},
},
{
caption: "a production must not have a duplicate alternative (non-empty and split alternatives)",
@@ -1371,7 +1371,7 @@ foo
bar
: "bar";
`,
- errs: []*SemanticError{semErrDuplicateProduction},
+ errs: []error{semErrDuplicateProduction},
},
{
caption: "a production must not have a duplicate alternative (empty alternatives)",
@@ -1390,7 +1390,7 @@ a
foo
: "foo";
`,
- errs: []*SemanticError{semErrDuplicateProduction},
+ errs: []error{semErrDuplicateProduction},
},
{
caption: "a production must not have a duplicate alternative (empty and split alternatives)",
@@ -1412,7 +1412,7 @@ a
foo
: "foo";
`,
- errs: []*SemanticError{semErrDuplicateProduction},
+ errs: []error{semErrDuplicateProduction},
},
{
caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates",
@@ -1428,7 +1428,7 @@ foo
s
: "a";
`,
- errs: []*SemanticError{semErrDuplicateName},
+ errs: []error{semErrDuplicateName},
},
{
caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates",
@@ -1450,7 +1450,7 @@ bar
a
: "a";
`,
- errs: []*SemanticError{semErrDuplicateName},
+ errs: []error{semErrDuplicateName},
},
{
caption: "an invalid top-level directive",
@@ -1466,7 +1466,7 @@ s
a
: 'a';
`,
- errs: []*SemanticError{semErrDirInvalidName},
+ errs: []error{semErrDirInvalidName},
},
{
caption: "a label must be unique in an alternative",
@@ -1482,7 +1482,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDuplicateLabel},
+ errs: []error{semErrDuplicateLabel},
},
{
caption: "a label cannot be the same name as terminal symbols",
@@ -1498,7 +1498,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDuplicateLabel},
+ errs: []error{semErrDuplicateLabel},
},
{
caption: "a label cannot be the same name as non-terminal symbols",
@@ -1518,7 +1518,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{
+ errs: []error{
semErrInvalidLabel,
},
},
@@ -1535,7 +1535,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrNoGrammarName},
+ errs: []error{semErrNoGrammarName},
},
{
caption: "the `#name` directive needs an ID parameter",
@@ -1549,7 +1549,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#name` directive cannot take a pattern parameter",
@@ -1563,7 +1563,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#name` directive cannot take a string parameter",
@@ -1577,7 +1577,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#name` directive takes just one parameter",
@@ -1591,7 +1591,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
}
@@ -1610,7 +1610,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take an ID parameter",
@@ -1626,7 +1626,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take an ordered symbol parameter",
@@ -1642,7 +1642,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take a pattern parameter",
@@ -1658,7 +1658,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take a string parameter",
@@ -1674,7 +1674,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive takes just one directive group parameter",
@@ -1690,7 +1690,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
}
@@ -1711,7 +1711,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#left` directive cannot be applied to an error symbol",
@@ -1732,7 +1732,7 @@ foo
semi_colon
: ';';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#left` directive cannot take an undefined symbol",
@@ -1750,7 +1750,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#left` directive cannot take a non-terminal symbol",
@@ -1768,7 +1768,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#left` directive cannot take a pattern parameter",
@@ -1786,7 +1786,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#left` directive cannot take a string parameter",
@@ -1804,7 +1804,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#left` directive cannot take a directive parameter",
@@ -1822,7 +1822,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#left` dirctive cannot be specified multiple times for a terminal symbol",
@@ -1840,7 +1840,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "the `#left` dirctive cannot be specified multiple times for an ordered symbol",
@@ -1858,7 +1858,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "a terminal symbol cannot have different precedence",
@@ -1877,7 +1877,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "an ordered symbol cannot have different precedence",
@@ -1896,7 +1896,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "a terminal symbol cannot have different associativity",
@@ -1915,7 +1915,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "an ordered symbol cannot have different associativity",
@@ -1934,7 +1934,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
}
@@ -1955,7 +1955,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#right` directive cannot be applied to an error symbol",
@@ -1976,7 +1976,7 @@ foo
semi_colon
: ';';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#right` directive cannot take an undefined symbol",
@@ -1994,7 +1994,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#right` directive cannot take a non-terminal symbol",
@@ -2012,7 +2012,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#right` directive cannot take a pattern parameter",
@@ -2030,7 +2030,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#right` directive cannot take a string parameter",
@@ -2048,7 +2048,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#right` directive cannot take a directive group parameter",
@@ -2066,7 +2066,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#right` directive cannot be specified multiple times for a terminal symbol",
@@ -2084,7 +2084,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "the `#right` directive cannot be specified multiple times for an ordered symbol",
@@ -2102,7 +2102,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "a terminal symbol cannot have different precedence",
@@ -2121,7 +2121,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "an ordered symbol cannot have different precedence",
@@ -2140,7 +2140,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "a terminal symbol cannot have different associativity",
@@ -2159,7 +2159,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "an ordered symbol cannot have different associativity",
@@ -2178,7 +2178,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
}
@@ -2199,7 +2199,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#assign` directive cannot be applied to an error symbol",
@@ -2220,7 +2220,7 @@ foo
semi_colon
: ';';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#assign` directive cannot take an undefined symbol",
@@ -2238,7 +2238,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#assign` directive cannot take a non-terminal symbol",
@@ -2256,7 +2256,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#assign` directive cannot take a pattern parameter",
@@ -2274,7 +2274,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#assign` directive cannot take a string parameter",
@@ -2292,7 +2292,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#assign` directive cannot take a directive parameter",
@@ -2310,7 +2310,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#assign` dirctive cannot be specified multiple times for a terminal symbol",
@@ -2328,7 +2328,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "the `#assign` dirctive cannot be specified multiple times for an ordered symbol",
@@ -2346,7 +2346,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "a terminal symbol cannot have different precedence",
@@ -2365,7 +2365,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "an ordered symbol cannot have different precedence",
@@ -2384,7 +2384,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "a terminal symbol cannot have different associativity",
@@ -2403,7 +2403,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
{
caption: "an ordered symbol cannot have different associativity",
@@ -2422,7 +2422,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateAssoc},
+ errs: []error{semErrDuplicateAssoc},
},
}
@@ -2441,7 +2441,7 @@ error
foo: 'foo';
`,
- errs: []*SemanticError{
+ errs: []error{
semErrErrSymIsReserved,
semErrDuplicateName,
},
@@ -2457,7 +2457,7 @@ s
error: 'error';
`,
- errs: []*SemanticError{semErrErrSymIsReserved},
+ errs: []error{semErrErrSymIsReserved},
},
{
caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive",
@@ -2473,7 +2473,7 @@ foo
error #skip
: 'error';
`,
- errs: []*SemanticError{semErrErrSymIsReserved},
+ errs: []error{semErrErrSymIsReserved},
},
}
@@ -2490,7 +2490,7 @@ s
foo
: "foo";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#ast` directive cannot take an ordered symbol parameter",
@@ -2508,7 +2508,7 @@ s
foo
: "foo";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#ast` directive cannot take a pattern parameter",
@@ -2522,7 +2522,7 @@ s
foo
: "foo";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#ast` directive cannot take a string parameter",
@@ -2536,7 +2536,7 @@ s
foo
: "foo";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#ast` directive cannot take a directive group parameter",
@@ -2550,7 +2550,7 @@ s
foo
: "foo";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative",
@@ -2566,7 +2566,7 @@ foo
bar
: "bar";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive",
@@ -2583,7 +2583,7 @@ foo
bar
: "bar";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "a label in a different alternative cannot be a parameter of the `#ast` directive",
@@ -2600,7 +2600,7 @@ foo
bar
: "bar";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "a symbol can appear in the `#ast` directive only once",
@@ -2614,7 +2614,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateElem},
+ errs: []error{semErrDuplicateElem},
},
{
caption: "a label can appear in the `#ast` directive only once",
@@ -2628,7 +2628,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateElem},
+ errs: []error{semErrDuplicateElem},
},
{
caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label",
@@ -2642,7 +2642,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDuplicateElem},
+ errs: []error{semErrDuplicateElem},
},
{
caption: "symbol `foo` is ambiguous because it appears in an alternative twice",
@@ -2656,7 +2656,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrAmbiguousElem},
+ errs: []error{semErrAmbiguousElem},
},
{
caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label",
@@ -2670,7 +2670,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrAmbiguousElem},
+ errs: []error{semErrAmbiguousElem},
},
{
caption: "the expansion operator cannot be applied to a terminal symbol",
@@ -2684,7 +2684,7 @@ s
foo
: "foo";
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
}
@@ -2701,7 +2701,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot be applied to an error symbol",
@@ -2715,7 +2715,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take an undefined symbol",
@@ -2729,7 +2729,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take a non-terminal symbol",
@@ -2752,7 +2752,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take an undefined ordered symbol parameter",
@@ -2766,7 +2766,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrUndefinedOrdSym},
+ errs: []error{semErrUndefinedOrdSym},
},
{
caption: "the `#prec` directive cannot take a pattern parameter",
@@ -2780,7 +2780,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take a string parameter",
@@ -2794,7 +2794,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#prec` directive cannot take a directive parameter",
@@ -2808,7 +2808,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "a symbol the `#prec` directive takes must be given precedence explicitly",
@@ -2824,7 +2824,7 @@ foo
bar
: 'bar';
`,
- errs: []*SemanticError{semErrUndefinedPrec},
+ errs: []error{semErrUndefinedPrec},
},
}
@@ -2841,7 +2841,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#recover` directive cannot take an ordered symbol parameter",
@@ -2859,7 +2859,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#recover` directive cannot take a pattern parameter",
@@ -2873,7 +2873,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#recover` directive cannot take a string parameter",
@@ -2887,7 +2887,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#recover` directive cannot take a directive group parameter",
@@ -2901,7 +2901,7 @@ s
foo
: 'foo';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
}
@@ -2918,7 +2918,7 @@ s
fragment f
: 'fragment';
`,
- errs: []*SemanticError{semErrUndefinedSym},
+ errs: []error{semErrUndefinedSym},
},
{
caption: "fragments cannot be duplicated",
@@ -2936,7 +2936,7 @@ fragment f
fragment f
: 'fragment 2';
`,
- errs: []*SemanticError{semErrDuplicateFragment},
+ errs: []error{semErrDuplicateFragment},
},
}
@@ -2955,7 +2955,7 @@ foo #push mode_1
bar #mode
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#mode` directive cannot take an ordered symbol parameter",
@@ -2975,7 +2975,7 @@ foo
bar #mode $x
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#mode` directive cannot take a pattern parameter",
@@ -2991,7 +2991,7 @@ foo #push mode_1
bar #mode "mode_1"
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#mode` directive cannot take a string parameter",
@@ -3007,7 +3007,7 @@ foo #push mode_1
bar #mode 'mode_1'
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#mode` directive cannot take a directive group parameter",
@@ -3023,7 +3023,7 @@ foo #push mode_1
bar #mode ()
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
}
@@ -3042,7 +3042,7 @@ foo #push
bar #mode mode_1
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#push` directive takes just one ID parameter",
@@ -3058,7 +3058,7 @@ foo #push mode_1 mode_2
bar #mode mode_1
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#push` directive cannot take an ordered symbol parameter",
@@ -3078,7 +3078,7 @@ foo #push $x
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#push` directive cannot take a pattern parameter",
@@ -3094,7 +3094,7 @@ foo #push "mode_1"
bar #mode mode_1
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#push` directive cannot take a string parameter",
@@ -3110,7 +3110,7 @@ foo #push 'mode_1'
bar #mode mode_1
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#push` directive cannot take a directive group parameter",
@@ -3126,7 +3126,7 @@ foo #push ()
bar #mode mode_1
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
}
@@ -3147,7 +3147,7 @@ bar #mode mode_1
baz #pop mode_1
: 'baz';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#pop` directive cannot take an ordered symbol parameter",
@@ -3169,7 +3169,7 @@ bar #mode mode_1
baz #pop $x
: 'baz';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#pop` directive cannot take a pattern parameter",
@@ -3187,7 +3187,7 @@ bar #mode mode_1
baz #pop "mode_1"
: 'baz';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#pop` directive cannot take a string parameter",
@@ -3205,7 +3205,7 @@ bar #mode mode_1
baz #pop 'mode_1'
: 'baz';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#pop` directive cannot take a directive parameter",
@@ -3223,7 +3223,7 @@ bar #mode mode_1
baz #pop ()
: 'baz';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
}
@@ -3242,7 +3242,7 @@ foo #skip bar
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#skip` directive cannot take an ordered symbol parameter",
@@ -3262,7 +3262,7 @@ foo #skip $x
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#skip` directive cannot take a pattern parameter",
@@ -3278,7 +3278,7 @@ foo #skip "bar"
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#skip` directive cannot take a string parameter",
@@ -3294,7 +3294,7 @@ foo #skip 'bar'
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "the `#skip` directive cannot take a directive group parameter",
@@ -3310,7 +3310,7 @@ foo #skip ()
bar
: 'bar';
`,
- errs: []*SemanticError{semErrDirInvalidParam},
+ errs: []error{semErrDirInvalidParam},
},
{
caption: "a terminal symbol used in productions cannot have the skip directive",
@@ -3326,7 +3326,7 @@ foo #skip
bar
: 'bar';
`,
- errs: []*SemanticError{semErrTermCannotBeSkipped},
+ errs: []error{semErrTermCannotBeSkipped},
},
}
@@ -3349,7 +3349,7 @@ bar
tests = append(tests, skipDirTests...)
for _, test := range tests {
t.Run(test.caption, func(t *testing.T) {
- ast, err := spec.Parse(strings.NewReader(test.specSrc))
+ ast, err := parser.Parse(strings.NewReader(test.specSrc))
if err != nil {
t.Fatal(err)
}
@@ -3357,7 +3357,7 @@ bar
b := GrammarBuilder{
AST: ast,
}
- _, err = b.Build()
+ _, err = b.build()
if err == nil {
t.Fatal("an expected error didn't occur")
}
diff --git a/grammar/item.go b/grammar/item.go
index 100d920..84c4911 100644
--- a/grammar/item.go
+++ b/grammar/item.go
@@ -6,6 +6,8 @@ import (
"fmt"
"sort"
"strconv"
+
+ "github.com/nihei9/vartan/grammar/symbol"
)
type lrItemID [32]byte
@@ -19,7 +21,7 @@ func (id lrItemID) num() uint32 {
}
type lookAhead struct {
- symbols map[symbol]struct{}
+ symbols map[symbol.Symbol]struct{}
// When propagation is true, an item propagates look-ahead symbols to other items.
propagation bool
@@ -38,7 +40,7 @@ type lrItem struct {
// 2 | T | E → E +・T
// 3 | Nil | E → E + T・
dot int
- dottedSymbol symbol
+ dottedSymbol symbol.Symbol
// When initial is true, the LHS of the production is the augmented start symbol and dot is 0.
// It looks like S' →・S.
@@ -74,13 +76,13 @@ func newLR0Item(prod *production, dot int) (*lrItem, error) {
id = sha256.Sum256(b)
}
- dottedSymbol := symbolNil
+ dottedSymbol := symbol.SymbolNil
if dot < prod.rhsLen {
dottedSymbol = prod.rhs[dot]
}
initial := false
- if prod.lhs.isStart() && dot == 0 {
+ if prod.lhs.IsStart() && dot == 0 {
initial = true
}
@@ -176,7 +178,7 @@ func (n stateNum) next() stateNum {
type lrState struct {
*kernel
num stateNum
- next map[symbol]kernelID
+ next map[symbol.Symbol]kernelID
reducible map[productionID]struct{}
// emptyProdItems stores items that have an empty production like `p → ε` and is reducible.
diff --git a/grammar/lalr1.go b/grammar/lalr1.go
index f1b8149..1667d84 100644
--- a/grammar/lalr1.go
+++ b/grammar/lalr1.go
@@ -1,6 +1,10 @@
package grammar
-import "fmt"
+import (
+ "fmt"
+
+ "github.com/nihei9/vartan/grammar/symbol"
+)
type stateAndLRItem struct {
kernelID kernelID
@@ -19,8 +23,8 @@ type lalr1Automaton struct {
func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet) (*lalr1Automaton, error) {
// Set the look-ahead symbol <EOF> to the initial item: [S' → ・S, $]
iniState := lr0.states[lr0.initialState]
- iniState.items[0].lookAhead.symbols = map[symbol]struct{}{
- symbolEOF: {},
+ iniState.items[0].lookAhead.symbols = map[symbol.Symbol]struct{}{
+ symbol.SymbolEOF: {},
}
var props []*propagation
@@ -55,7 +59,7 @@ func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet)
return nil, fmt.Errorf("reducible item not found: %v", item.id)
}
if reducibleItem.lookAhead.symbols == nil {
- reducibleItem.lookAhead.symbols = map[symbol]struct{}{}
+ reducibleItem.lookAhead.symbols = map[symbol.Symbol]struct{}{}
}
for a := range item.lookAhead.symbols {
reducibleItem.lookAhead.symbols[a] = struct{}{}
@@ -104,7 +108,7 @@ func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet)
}
if nextItem.lookAhead.symbols == nil {
- nextItem.lookAhead.symbols = map[symbol]struct{}{}
+ nextItem.lookAhead.symbols = map[symbol.Symbol]struct{}{}
}
for a := range item.lookAhead.symbols {
@@ -138,7 +142,7 @@ func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet)
func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]*lrItem, error) {
items := []*lrItem{}
- knownItems := map[lrItemID]map[symbol]struct{}{}
+ knownItems := map[lrItemID]map[symbol.Symbol]struct{}{}
knownItemsProp := map[lrItemID]struct{}{}
uncheckedItems := []*lrItem{}
items = append(items, srcItem)
@@ -146,7 +150,7 @@ func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]
for len(uncheckedItems) > 0 {
nextUncheckedItems := []*lrItem{}
for _, item := range uncheckedItems {
- if item.dottedSymbol.isTerminal() {
+ if item.dottedSymbol.IsTerminal() {
continue
}
@@ -155,7 +159,7 @@ func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]
return nil, fmt.Errorf("production not found: %v", item.prod)
}
- var fstSyms []symbol
+ var fstSyms []symbol.Symbol
var isFstNullable bool
{
fst, err := first.find(p, item.dot+1)
@@ -163,7 +167,7 @@ func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]
return nil, err
}
- fstSyms = make([]symbol, len(fst.symbols))
+ fstSyms = make([]symbol.Symbol, len(fst.symbols))
i := 0
for s := range fst.symbols {
fstSyms[i] = s
@@ -176,7 +180,7 @@ func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]
ps, _ := prods.findByLHS(item.dottedSymbol)
for _, prod := range ps {
- var lookAhead []symbol
+ var lookAhead []symbol.Symbol
{
var lookAheadCount int
if isFstNullable {
@@ -185,7 +189,7 @@ func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]
lookAheadCount = len(fstSyms)
}
- lookAhead = make([]symbol, lookAheadCount)
+ lookAhead = make([]symbol.Symbol, lookAheadCount)
i := 0
for _, s := range fstSyms {
lookAhead[i] = s
@@ -210,13 +214,13 @@ func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]
}
}
- newItem.lookAhead.symbols = map[symbol]struct{}{
+ newItem.lookAhead.symbols = map[symbol.Symbol]struct{}{
a: {},
}
items = append(items, newItem)
if knownItems[newItem.id] == nil {
- knownItems[newItem.id] = map[symbol]struct{}{}
+ knownItems[newItem.id] = map[symbol.Symbol]struct{}{}
}
knownItems[newItem.id][a] = struct{}{}
nextUncheckedItems = append(nextUncheckedItems, newItem)
@@ -297,7 +301,7 @@ func propagateLookAhead(lr0 *lr0Automaton, props []*propagation) error {
}
if destItem.lookAhead.symbols == nil {
- destItem.lookAhead.symbols = map[symbol]struct{}{}
+ destItem.lookAhead.symbols = map[symbol.Symbol]struct{}{}
}
destItem.lookAhead.symbols[a] = struct{}{}
diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go
index d6d0371..c57dc5c 100644
--- a/grammar/lalr1_test.go
+++ b/grammar/lalr1_test.go
@@ -4,7 +4,8 @@ import (
"strings"
"testing"
- spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/grammar/symbol"
+ "github.com/nihei9/vartan/spec/grammar/parser"
)
func TestGenLALR1Automaton(t *testing.T) {
@@ -23,15 +24,14 @@ id: "[A-Za-z0-9_]+";
var gram *Grammar
var automaton *lalr1Automaton
{
- ast, err := spec.Parse(strings.NewReader(src))
+ ast, err := parser.Parse(strings.NewReader(src))
if err != nil {
t.Fatal(err)
}
b := GrammarBuilder{
AST: ast,
}
-
- gram, err = b.Build()
+ gram, err = b.build()
if err != nil {
t.Fatal(err)
}
@@ -66,42 +66,42 @@ id: "[A-Za-z0-9_]+";
expectedKernels := map[int][]*lrItem{
0: {
- withLookAhead(genLR0Item("s'", 0, "s"), symbolEOF),
+ withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
},
1: {
- withLookAhead(genLR0Item("s'", 1, "s"), symbolEOF),
+ withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
},
2: {
- withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbolEOF),
- withLookAhead(genLR0Item("r", 1, "l"), symbolEOF),
+ withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
+ withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
},
3: {
- withLookAhead(genLR0Item("s", 1, "r"), symbolEOF),
+ withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
},
4: {
- withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbolEOF),
+ withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
},
5: {
- withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbolEOF),
+ withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
},
6: {
- withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbolEOF),
+ withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
},
7: {
- withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbolEOF),
+ withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
},
8: {
- withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbolEOF),
+ withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
},
9: {
- withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbolEOF),
+ withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
},
}
expectedStates := []*expectedLRState{
{
kernelItems: expectedKernels[0],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("s"): expectedKernels[1],
genSym("l"): expectedKernels[2],
genSym("r"): expectedKernels[3],
@@ -112,14 +112,14 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[1],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("s'", "s"),
},
},
{
kernelItems: expectedKernels[2],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("eq"): expectedKernels[6],
},
reducibleProds: []*production{
@@ -128,14 +128,14 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[3],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("s", "r"),
},
},
{
kernelItems: expectedKernels[4],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("r"): expectedKernels[7],
genSym("l"): expectedKernels[8],
genSym("ref"): expectedKernels[4],
@@ -145,14 +145,14 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[5],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("l", "id"),
},
},
{
kernelItems: expectedKernels[6],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("r"): expectedKernels[9],
genSym("l"): expectedKernels[8],
genSym("ref"): expectedKernels[4],
@@ -162,21 +162,21 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[7],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("l", "ref", "r"),
},
},
{
kernelItems: expectedKernels[8],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("r", "l"),
},
},
{
kernelItems: expectedKernels[9],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("s", "l", "eq", "r"),
},
diff --git a/grammar/lexical/compiler.go b/grammar/lexical/compiler.go
new file mode 100644
index 0000000..61aa3f2
--- /dev/null
+++ b/grammar/lexical/compiler.go
@@ -0,0 +1,413 @@
+package lexical
+
+import (
+ "bytes"
+ "fmt"
+
+ "github.com/nihei9/vartan/compressor"
+ "github.com/nihei9/vartan/grammar/lexical/dfa"
+ psr "github.com/nihei9/vartan/grammar/lexical/parser"
+ spec "github.com/nihei9/vartan/spec/grammar"
+)
+
+type CompileError struct {
+ Kind spec.LexKindName
+ Fragment bool
+ Cause error
+ Detail string
+}
+
+func Compile(lexspec *LexSpec, compLv int) (*spec.LexicalSpec, error, []*CompileError) {
+ err := lexspec.Validate()
+ if err != nil {
+ return nil, fmt.Errorf("invalid lexical specification:\n%w", err), nil
+ }
+
+ modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries)
+
+ modeSpecs := []*spec.CompiledLexModeSpec{
+ nil,
+ }
+ for i, es := range modeEntries[1:] {
+ modeName := modeNames[i+1]
+ modeSpec, err, cerrs := compile(es, modeName2ID, fragmetns, compLv)
+ if err != nil {
+ return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err), cerrs
+ }
+ modeSpecs = append(modeSpecs, modeSpec)
+ }
+
+ var kindNames []spec.LexKindName
+ var name2ID map[spec.LexKindName]spec.LexKindID
+ {
+ name2ID = map[spec.LexKindName]spec.LexKindID{}
+ id := spec.LexKindIDMin
+ for _, modeSpec := range modeSpecs[1:] {
+ for _, name := range modeSpec.KindNames[1:] {
+ if _, ok := name2ID[name]; ok {
+ continue
+ }
+ name2ID[name] = id
+ id++
+ }
+ }
+
+ kindNames = make([]spec.LexKindName, len(name2ID)+1)
+ for name, id := range name2ID {
+ kindNames[id] = name
+ }
+ }
+
+ var kindIDs [][]spec.LexKindID
+ {
+ kindIDs = make([][]spec.LexKindID, len(modeSpecs))
+ for i, modeSpec := range modeSpecs[1:] {
+ ids := make([]spec.LexKindID, len(modeSpec.KindNames))
+ for modeID, name := range modeSpec.KindNames {
+ if modeID == 0 {
+ continue
+ }
+ ids[modeID] = name2ID[name]
+ }
+ kindIDs[i+1] = ids
+ }
+ }
+
+ return &spec.LexicalSpec{
+ InitialModeID: spec.LexModeIDDefault,
+ ModeNames: modeNames,
+ KindNames: kindNames,
+ KindIDs: kindIDs,
+ CompressionLevel: compLv,
+ Specs: modeSpecs,
+ }, nil, nil
+}
+
+func groupEntriesByLexMode(entries []*LexEntry) ([][]*LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[spec.LexKindName]*LexEntry) {
+ modeNames := []spec.LexModeName{
+ spec.LexModeNameNil,
+ spec.LexModeNameDefault,
+ }
+ modeName2ID := map[spec.LexModeName]spec.LexModeID{
+ spec.LexModeNameNil: spec.LexModeIDNil,
+ spec.LexModeNameDefault: spec.LexModeIDDefault,
+ }
+ lastModeID := spec.LexModeIDDefault
+ modeEntries := [][]*LexEntry{
+ nil,
+ {},
+ }
+ fragments := map[spec.LexKindName]*LexEntry{}
+ for _, e := range entries {
+ if e.Fragment {
+ fragments[e.Kind] = e
+ continue
+ }
+ ms := e.Modes
+ if len(ms) == 0 {
+ ms = []spec.LexModeName{
+ spec.LexModeNameDefault,
+ }
+ }
+ for _, modeName := range ms {
+ modeID, ok := modeName2ID[modeName]
+ if !ok {
+ modeID = lastModeID + 1
+ lastModeID = modeID
+ modeName2ID[modeName] = modeID
+ modeNames = append(modeNames, modeName)
+ modeEntries = append(modeEntries, []*LexEntry{})
+ }
+ modeEntries[modeID] = append(modeEntries[modeID], e)
+ }
+ }
+ return modeEntries, modeNames, modeName2ID, fragments
+}
+
+func compile(
+ entries []*LexEntry,
+ modeName2ID map[spec.LexModeName]spec.LexModeID,
+ fragments map[spec.LexKindName]*LexEntry,
+ compLv int,
+) (*spec.CompiledLexModeSpec, error, []*CompileError) {
+ var kindNames []spec.LexKindName
+ kindIDToName := map[spec.LexModeKindID]spec.LexKindName{}
+ var patterns map[spec.LexModeKindID][]byte
+ {
+ kindNames = append(kindNames, spec.LexKindNameNil)
+ patterns = map[spec.LexModeKindID][]byte{}
+ for i, e := range entries {
+ kindID := spec.LexModeKindID(i + 1)
+
+ kindNames = append(kindNames, e.Kind)
+ kindIDToName[kindID] = e.Kind
+ patterns[kindID] = []byte(e.Pattern)
+ }
+ }
+
+ push := []spec.LexModeID{
+ spec.LexModeIDNil,
+ }
+ pop := []int{
+ 0,
+ }
+ for _, e := range entries {
+ pushV := spec.LexModeIDNil
+ if e.Push != "" {
+ pushV = modeName2ID[e.Push]
+ }
+ push = append(push, pushV)
+ popV := 0
+ if e.Pop {
+ popV = 1
+ }
+ pop = append(pop, popV)
+ }
+
+ fragmentPatterns := map[spec.LexKindName][]byte{}
+ for k, e := range fragments {
+ fragmentPatterns[k] = []byte(e.Pattern)
+ }
+
+ fragmentCPTrees := make(map[spec.LexKindName]psr.CPTree, len(fragmentPatterns))
+ {
+ var cerrs []*CompileError
+ for kind, pat := range fragmentPatterns {
+ p := psr.NewParser(kind, bytes.NewReader(pat))
+ t, err := p.Parse()
+ if err != nil {
+ if err == psr.ParseErr {
+ detail, cause := p.Error()
+ cerrs = append(cerrs, &CompileError{
+ Kind: kind,
+ Fragment: true,
+ Cause: cause,
+ Detail: detail,
+ })
+ } else {
+ cerrs = append(cerrs, &CompileError{
+ Kind: kind,
+ Fragment: true,
+ Cause: err,
+ })
+ }
+ continue
+ }
+ fragmentCPTrees[kind] = t
+ }
+ if len(cerrs) > 0 {
+ return nil, fmt.Errorf("compile error"), cerrs
+ }
+
+ err := psr.CompleteFragments(fragmentCPTrees)
+ if err != nil {
+ if err == psr.ParseErr {
+ for _, frag := range fragmentCPTrees {
+ kind, frags, err := frag.Describe()
+ if err != nil {
+ return nil, err, nil
+ }
+
+ cerrs = append(cerrs, &CompileError{
+ Kind: kind,
+ Fragment: true,
+ Cause: fmt.Errorf("fragment contains undefined fragments or cycles"),
+ Detail: fmt.Sprintf("%v", frags),
+ })
+ }
+
+ return nil, fmt.Errorf("compile error"), cerrs
+ }
+
+ return nil, err, nil
+ }
+ }
+
+ cpTrees := map[spec.LexModeKindID]psr.CPTree{}
+ {
+ pats := make([]*psr.PatternEntry, len(patterns)+1)
+ pats[spec.LexModeKindIDNil] = &psr.PatternEntry{
+ ID: spec.LexModeKindIDNil,
+ }
+ for id, pattern := range patterns {
+ pats[id] = &psr.PatternEntry{
+ ID: id,
+ Pattern: pattern,
+ }
+ }
+
+ var cerrs []*CompileError
+ for _, pat := range pats {
+ if pat.ID == spec.LexModeKindIDNil {
+ continue
+ }
+
+ p := psr.NewParser(kindIDToName[pat.ID], bytes.NewReader(pat.Pattern))
+ t, err := p.Parse()
+ if err != nil {
+ if err == psr.ParseErr {
+ detail, cause := p.Error()
+ cerrs = append(cerrs, &CompileError{
+ Kind: kindIDToName[pat.ID],
+ Fragment: false,
+ Cause: cause,
+ Detail: detail,
+ })
+ } else {
+ cerrs = append(cerrs, &CompileError{
+ Kind: kindIDToName[pat.ID],
+ Fragment: false,
+ Cause: err,
+ })
+ }
+ continue
+ }
+
+ complete, err := psr.ApplyFragments(t, fragmentCPTrees)
+ if err != nil {
+ return nil, err, nil
+ }
+ if !complete {
+ _, frags, err := t.Describe()
+ if err != nil {
+ return nil, err, nil
+ }
+
+ cerrs = append(cerrs, &CompileError{
+ Kind: kindIDToName[pat.ID],
+ Fragment: false,
+ Cause: fmt.Errorf("pattern contains undefined fragments"),
+ Detail: fmt.Sprintf("%v", frags),
+ })
+ continue
+ }
+
+ cpTrees[pat.ID] = t
+ }
+ if len(cerrs) > 0 {
+ return nil, fmt.Errorf("compile error"), cerrs
+ }
+ }
+
+ var tranTab *spec.TransitionTable
+ {
+ root, symTab, err := dfa.ConvertCPTreeToByteTree(cpTrees)
+ if err != nil {
+ return nil, err, nil
+ }
+ d := dfa.GenDFA(root, symTab)
+ tranTab, err = dfa.GenTransitionTable(d)
+ if err != nil {
+ return nil, err, nil
+ }
+ }
+
+ var err error
+ switch compLv {
+ case 2:
+ tranTab, err = compressTransitionTableLv2(tranTab)
+ if err != nil {
+ return nil, err, nil
+ }
+ case 1:
+ tranTab, err = compressTransitionTableLv1(tranTab)
+ if err != nil {
+ return nil, err, nil
+ }
+ }
+
+ return &spec.CompiledLexModeSpec{
+ KindNames: kindNames,
+ Push: push,
+ Pop: pop,
+ DFA: tranTab,
+ }, nil, nil
+}
+
+const (
+ CompressionLevelMin = 0
+ CompressionLevelMax = 2
+)
+
+func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
+ ueTab := compressor.NewUniqueEntriesTable()
+ {
+ orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
+ if err != nil {
+ return nil, err
+ }
+ err = ueTab.Compress(orig)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ rdTab := compressor.NewRowDisplacementTable(0)
+ {
+ orig, err := compressor.NewOriginalTable(ueTab.UniqueEntries, ueTab.OriginalColCount)
+ if err != nil {
+ return nil, err
+ }
+ err = rdTab.Compress(orig)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ tranTab.Transition = &spec.UniqueEntriesTable{
+ UniqueEntries: &spec.RowDisplacementTable{
+ OriginalRowCount: rdTab.OriginalRowCount,
+ OriginalColCount: rdTab.OriginalColCount,
+ EmptyValue: spec.StateIDNil,
+ Entries: convertIntSliceToStateIDSlice(rdTab.Entries),
+ Bounds: rdTab.Bounds,
+ RowDisplacement: rdTab.RowDisplacement,
+ },
+ RowNums: ueTab.RowNums,
+ OriginalRowCount: ueTab.OriginalRowCount,
+ OriginalColCount: ueTab.OriginalColCount,
+ }
+ tranTab.UncompressedTransition = nil
+
+ return tranTab, nil
+}
+
+func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
+ ueTab := compressor.NewUniqueEntriesTable()
+ {
+ orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
+ if err != nil {
+ return nil, err
+ }
+ err = ueTab.Compress(orig)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ tranTab.Transition = &spec.UniqueEntriesTable{
+ UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries),
+ RowNums: ueTab.RowNums,
+ OriginalRowCount: ueTab.OriginalRowCount,
+ OriginalColCount: ueTab.OriginalColCount,
+ }
+ tranTab.UncompressedTransition = nil
+
+ return tranTab, nil
+}
+
+func convertStateIDSliceToIntSlice(s []spec.StateID) []int {
+ is := make([]int, len(s))
+ for i, v := range s {
+ is[i] = v.Int()
+ }
+ return is
+}
+
+func convertIntSliceToStateIDSlice(s []int) []spec.StateID {
+ ss := make([]spec.StateID, len(s))
+ for i, v := range s {
+ ss[i] = spec.StateID(v)
+ }
+ return ss
+}
diff --git a/grammar/lexical/compiler_test.go b/grammar/lexical/compiler_test.go
new file mode 100644
index 0000000..f2ef0f2
--- /dev/null
+++ b/grammar/lexical/compiler_test.go
@@ -0,0 +1,338 @@
+package lexical
+
+import (
+ "encoding/json"
+ "fmt"
+ "testing"
+
+ spec "github.com/nihei9/vartan/spec/grammar"
+)
+
+func TestLexSpec_Validate(t *testing.T) {
+ // We expect that the spelling inconsistency error will occur.
+ spec := &LexSpec{
+ Entries: []*LexEntry{
+ {
+ Modes: []spec.LexModeName{
+ // 'Default' is the spelling inconsistency because 'default' is predefined.
+ "Default",
+ },
+ Kind: "foo",
+ Pattern: "foo",
+ },
+ },
+ }
+ err := spec.Validate()
+ if err == nil {
+ t.Fatalf("expected error didn't occur")
+ }
+}
+
+func TestSnakeCaseToUpperCamelCase(t *testing.T) {
+ tests := []struct {
+ snake string
+ camel string
+ }{
+ {
+ snake: "foo",
+ camel: "Foo",
+ },
+ {
+ snake: "foo_bar",
+ camel: "FooBar",
+ },
+ {
+ snake: "foo_bar_baz",
+ camel: "FooBarBaz",
+ },
+ {
+ snake: "Foo",
+ camel: "Foo",
+ },
+ {
+ snake: "fooBar",
+ camel: "FooBar",
+ },
+ {
+ snake: "FOO",
+ camel: "FOO",
+ },
+ {
+ snake: "FOO_BAR",
+ camel: "FOOBAR",
+ },
+ {
+ snake: "_foo_bar_",
+ camel: "FooBar",
+ },
+ {
+ snake: "___foo___bar___",
+ camel: "FooBar",
+ },
+ }
+ for _, tt := range tests {
+ c := SnakeCaseToUpperCamelCase(tt.snake)
+ if c != tt.camel {
+ t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c)
+ }
+ }
+}
+
+func TestFindSpellingInconsistencies(t *testing.T) {
+ tests := []struct {
+ ids []string
+ duplicated [][]string
+ }{
+ {
+ ids: []string{"foo", "foo"},
+ duplicated: nil,
+ },
+ {
+ ids: []string{"foo", "Foo"},
+ duplicated: [][]string{{"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo", "foo", "Foo"},
+ duplicated: [][]string{{"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo_bar_baz", "FooBarBaz"},
+ duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}},
+ },
+ {
+ ids: []string{"foo", "Foo", "bar", "Bar"},
+ duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"},
+ duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ duplicated := FindSpellingInconsistencies(tt.ids)
+ if len(duplicated) != len(tt.duplicated) {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated)
+ }
+ for i, dupIDs := range duplicated {
+ if len(dupIDs) != len(tt.duplicated[i]) {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
+ }
+ for j, id := range dupIDs {
+ if id != tt.duplicated[i][j] {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
+ }
+ }
+ }
+ })
+ }
+}
+
+func TestCompile(t *testing.T) {
+ tests := []struct {
+ Caption string
+ Spec string
+ Err bool
+ }{
+ {
+ Caption: "allow duplicates names between fragments and non-fragments",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "a2z",
+ "pattern": "\\f{a2z}"
+ },
+ {
+ "fragment": true,
+ "kind": "a2z",
+ "pattern": "[a-z]"
+ }
+ ]
+}
+`,
+ },
+ {
+ Caption: "don't allow duplicates names in non-fragments",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "a2z",
+ "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z"
+ },
+ {
+ "kind": "a2z",
+ "pattern": "[a-z]"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "don't allow duplicates names in fragments",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "a2z",
+ "pattern": "\\f{a2z}"
+ },
+ {
+ "fragments": true,
+ "kind": "a2z",
+ "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z"
+ },
+ {
+ "fragments": true,
+ "kind": "a2z",
+ "pattern": "[a-z]"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "don't allow kind names in the same mode to contain spelling inconsistencies",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "foo_1",
+ "pattern": "foo_1"
+ },
+ {
+ "kind": "foo1",
+ "pattern": "foo1"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "don't allow kind names across modes to contain spelling inconsistencies",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "modes": ["default"],
+ "kind": "foo_1",
+ "pattern": "foo_1"
+ },
+ {
+ "modes": ["other_mode"],
+ "kind": "foo1",
+ "pattern": "foo1"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "don't allow mode names to contain spelling inconsistencies",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "modes": ["foo_1"],
+ "kind": "a",
+ "pattern": "a"
+ },
+ {
+ "modes": ["foo1"],
+ "kind": "b",
+ "pattern": "b"
+ }
+ ]
+}
+`,
+ Err: true,
+ },
+ {
+ Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "kind": "a",
+ "pattern": "a"
+ },
+ {
+ "fragment": true,
+ "kind": "foo_1",
+ "pattern": "foo_1"
+ },
+ {
+ "fragment": true,
+ "kind": "foo1",
+ "pattern": "foo1"
+ }
+ ]
+}
+`,
+ },
+ {
+ Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files",
+ Spec: `
+{
+ "name": "test",
+ "entries": [
+ {
+ "modes": ["default"],
+ "kind": "a",
+ "pattern": "a"
+ },
+ {
+ "modes": ["default"],
+ "fragment": true,
+ "kind": "foo_1",
+ "pattern": "foo_1"
+ },
+ {
+ "modes": ["other_mode"],
+ "fragment": true,
+ "kind": "foo1",
+ "pattern": "foo1"
+ }
+ ]
+}
+`,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) {
+ lspec := &LexSpec{}
+ err := json.Unmarshal([]byte(tt.Spec), lspec)
+ if err != nil {
+ t.Fatalf("%v", err)
+ }
+ clspec, err, _ := Compile(lspec, CompressionLevelMin)
+ if tt.Err {
+ if err == nil {
+ t.Fatalf("expected an error")
+ }
+ if clspec != nil {
+ t.Fatalf("Compile function mustn't return a compiled specification")
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ if clspec == nil {
+ t.Fatalf("Compile function must return a compiled specification")
+ }
+ }
+ })
+ }
+}
diff --git a/grammar/lexical/dfa/dfa.go b/grammar/lexical/dfa/dfa.go
new file mode 100644
index 0000000..850264a
--- /dev/null
+++ b/grammar/lexical/dfa/dfa.go
@@ -0,0 +1,173 @@
+package dfa
+
+import (
+ "sort"
+
+ spec "github.com/nihei9/vartan/spec/grammar"
+)
+
+type symbolTable struct {
+ symPos2Byte map[symbolPosition]byteRange
+ endPos2ID map[symbolPosition]spec.LexModeKindID
+}
+
+func genSymbolTable(root byteTree) *symbolTable {
+ symTab := &symbolTable{
+ symPos2Byte: map[symbolPosition]byteRange{},
+ endPos2ID: map[symbolPosition]spec.LexModeKindID{},
+ }
+ return genSymTab(symTab, root)
+}
+
+func genSymTab(symTab *symbolTable, node byteTree) *symbolTable {
+ if node == nil {
+ return symTab
+ }
+
+ switch n := node.(type) {
+ case *symbolNode:
+ symTab.symPos2Byte[n.pos] = byteRange{
+ from: n.from,
+ to: n.to,
+ }
+ case *endMarkerNode:
+ symTab.endPos2ID[n.pos] = n.id
+ default:
+ left, right := node.children()
+ genSymTab(symTab, left)
+ genSymTab(symTab, right)
+ }
+ return symTab
+}
+
+type DFA struct {
+ States []string
+ InitialState string
+ AcceptingStatesTable map[string]spec.LexModeKindID
+ TransitionTable map[string][256]string
+}
+
+func GenDFA(root byteTree, symTab *symbolTable) *DFA {
+ initialState := root.first()
+ initialStateHash := initialState.hash()
+ stateMap := map[string]*symbolPositionSet{
+ initialStateHash: initialState,
+ }
+ tranTab := map[string][256]string{}
+ {
+ follow := genFollowTable(root)
+ unmarkedStates := map[string]*symbolPositionSet{
+ initialStateHash: initialState,
+ }
+ for len(unmarkedStates) > 0 {
+ nextUnmarkedStates := map[string]*symbolPositionSet{}
+ for hash, state := range unmarkedStates {
+ tranTabOfState := [256]*symbolPositionSet{}
+ for _, pos := range state.set() {
+ if pos.isEndMark() {
+ continue
+ }
+ valRange := symTab.symPos2Byte[pos]
+ for symVal := valRange.from; symVal <= valRange.to; symVal++ {
+ if tranTabOfState[symVal] == nil {
+ tranTabOfState[symVal] = newSymbolPositionSet()
+ }
+ tranTabOfState[symVal].merge(follow[pos])
+ }
+ }
+ for _, t := range tranTabOfState {
+ if t == nil {
+ continue
+ }
+ h := t.hash()
+ if _, ok := stateMap[h]; ok {
+ continue
+ }
+ stateMap[h] = t
+ nextUnmarkedStates[h] = t
+ }
+ tabOfState := [256]string{}
+ for v, t := range tranTabOfState {
+ if t == nil {
+ continue
+ }
+ tabOfState[v] = t.hash()
+ }
+ tranTab[hash] = tabOfState
+ }
+ unmarkedStates = nextUnmarkedStates
+ }
+ }
+
+ accTab := map[string]spec.LexModeKindID{}
+ {
+ for h, s := range stateMap {
+ for _, pos := range s.set() {
+ if !pos.isEndMark() {
+ continue
+ }
+ priorID, ok := accTab[h]
+ if !ok {
+ accTab[h] = symTab.endPos2ID[pos]
+ } else {
+ id := symTab.endPos2ID[pos]
+ if id < priorID {
+ accTab[h] = id
+ }
+ }
+ }
+ }
+ }
+
+ var states []string
+ {
+ for s := range stateMap {
+ states = append(states, s)
+ }
+ sort.Slice(states, func(i, j int) bool {
+ return states[i] < states[j]
+ })
+ }
+
+ return &DFA{
+ States: states,
+ InitialState: initialStateHash,
+ AcceptingStatesTable: accTab,
+ TransitionTable: tranTab,
+ }
+}
+
+func GenTransitionTable(dfa *DFA) (*spec.TransitionTable, error) {
+ stateHash2ID := map[string]spec.StateID{}
+ for i, s := range dfa.States {
+ // Since 0 represents an invalid value in a transition table,
+ // assign a number greater than or equal to 1 to states.
+ stateHash2ID[s] = spec.StateID(i + spec.StateIDMin.Int())
+ }
+
+ acc := make([]spec.LexModeKindID, len(dfa.States)+1)
+ for _, s := range dfa.States {
+ id, ok := dfa.AcceptingStatesTable[s]
+ if !ok {
+ continue
+ }
+ acc[stateHash2ID[s]] = id
+ }
+
+ rowCount := len(dfa.States) + 1
+ colCount := 256
+ tran := make([]spec.StateID, rowCount*colCount)
+ for s, tab := range dfa.TransitionTable {
+ for v, to := range tab {
+ tran[stateHash2ID[s].Int()*256+v] = stateHash2ID[to]
+ }
+ }
+
+ return &spec.TransitionTable{
+ InitialStateID: stateHash2ID[dfa.InitialState],
+ AcceptingStates: acc,
+ UncompressedTransition: tran,
+ RowCount: rowCount,
+ ColCount: colCount,
+ }, nil
+}
diff --git a/grammar/lexical/dfa/dfa_test.go b/grammar/lexical/dfa/dfa_test.go
new file mode 100644
index 0000000..ae71875
--- /dev/null
+++ b/grammar/lexical/dfa/dfa_test.go
@@ -0,0 +1,121 @@
+package dfa
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/grammar/lexical/parser"
+ spec "github.com/nihei9/vartan/spec/grammar"
+)
+
+func TestGenDFA(t *testing.T) {
+ p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
+ cpt, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+ bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
+ spec.LexModeKindIDMin: cpt,
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+ dfa := GenDFA(bt, symTab)
+ if dfa == nil {
+ t.Fatalf("DFA is nil")
+ }
+
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3))
+ s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4))
+ s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5))
+ s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6))
+
+ rune2Int := func(char rune, index int) uint8 {
+ return uint8([]byte(string(char))[index])
+ }
+
+ tranS0 := [256]string{}
+ tranS0[rune2Int('a', 0)] = s1.hash()
+ tranS0[rune2Int('b', 0)] = s0.hash()
+
+ tranS1 := [256]string{}
+ tranS1[rune2Int('a', 0)] = s1.hash()
+ tranS1[rune2Int('b', 0)] = s2.hash()
+
+ tranS2 := [256]string{}
+ tranS2[rune2Int('a', 0)] = s1.hash()
+ tranS2[rune2Int('b', 0)] = s3.hash()
+
+ tranS3 := [256]string{}
+ tranS3[rune2Int('a', 0)] = s1.hash()
+ tranS3[rune2Int('b', 0)] = s0.hash()
+
+ expectedTranTab := map[string][256]string{
+ s0.hash(): tranS0,
+ s1.hash(): tranS1,
+ s2.hash(): tranS2,
+ s3.hash(): tranS3,
+ }
+ if len(dfa.TransitionTable) != len(expectedTranTab) {
+ t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable))
+ }
+ for h, eTranTab := range expectedTranTab {
+ tranTab, ok := dfa.TransitionTable[h]
+ if !ok {
+ t.Errorf("no entry; hash: %v", h)
+ continue
+ }
+ if len(tranTab) != len(eTranTab) {
+ t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab))
+ }
+ for c, eNext := range eTranTab {
+ if eNext == "" {
+ continue
+ }
+
+ next := tranTab[c]
+ if next == "" {
+ t.Errorf("no enatry: hash: %v, char: %v", h, c)
+ }
+ if next != eNext {
+ t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next)
+ }
+ }
+ }
+
+ if dfa.InitialState != s0.hash() {
+ t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState)
+ }
+
+ accTab := map[string]spec.LexModeKindID{
+ s3.hash(): 1,
+ }
+ if len(dfa.AcceptingStatesTable) != len(accTab) {
+ t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable))
+ }
+ for eState, eID := range accTab {
+ id, ok := dfa.AcceptingStatesTable[eState]
+ if !ok {
+ t.Errorf("accepting state is not found: state: %v", eState)
+ }
+ if id != eID {
+ t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id)
+ }
+ }
+}
diff --git a/grammar/lexical/dfa/symbol_position.go b/grammar/lexical/dfa/symbol_position.go
new file mode 100644
index 0000000..f154251
--- /dev/null
+++ b/grammar/lexical/dfa/symbol_position.go
@@ -0,0 +1,182 @@
+package dfa
+
+import (
+ "encoding/binary"
+ "fmt"
+ "strings"
+)
+
+type symbolPosition uint16
+
+const (
+ symbolPositionNil symbolPosition = 0x0000
+
+ symbolPositionMin uint16 = 0x0001
+ symbolPositionMax uint16 = 0x7fff
+
+ symbolPositionMaskSymbol uint16 = 0x0000
+ symbolPositionMaskEndMark uint16 = 0x8000
+
+ symbolPositionMaskValue uint16 = 0x7fff
+)
+
+func newSymbolPosition(n uint16, endMark bool) (symbolPosition, error) {
+ if n < symbolPositionMin || n > symbolPositionMax {
+ return symbolPositionNil, fmt.Errorf("symbol position must be within %v to %v: n: %v, endMark: %v", symbolPositionMin, symbolPositionMax, n, endMark)
+ }
+ if endMark {
+ return symbolPosition(n | symbolPositionMaskEndMark), nil
+ }
+ return symbolPosition(n | symbolPositionMaskSymbol), nil
+}
+
+func (p symbolPosition) String() string {
+ if p.isEndMark() {
+ return fmt.Sprintf("end#%v", uint16(p)&symbolPositionMaskValue)
+ }
+ return fmt.Sprintf("sym#%v", uint16(p)&symbolPositionMaskValue)
+}
+
+func (p symbolPosition) isEndMark() bool {
+ return uint16(p)&symbolPositionMaskEndMark > 1
+}
+
+func (p symbolPosition) describe() (uint16, bool) {
+ v := uint16(p) & symbolPositionMaskValue
+ if p.isEndMark() {
+ return v, true
+ }
+ return v, false
+}
+
+type symbolPositionSet struct {
+ // `s` represents a set of symbol positions.
+ // However, immediately after adding a symbol position, the elements may be duplicated.
+ // When you need an aligned set with no duplicates, you can get such value via the set function.
+ s []symbolPosition
+ sorted bool
+}
+
+func newSymbolPositionSet() *symbolPositionSet {
+ return &symbolPositionSet{
+ s: []symbolPosition{},
+ sorted: false,
+ }
+}
+
+func (s *symbolPositionSet) String() string {
+ if len(s.s) <= 0 {
+ return "{}"
+ }
+ ps := s.sortAndRemoveDuplicates()
+ var b strings.Builder
+ fmt.Fprintf(&b, "{")
+ for i, p := range ps {
+ if i <= 0 {
+ fmt.Fprintf(&b, "%v", p)
+ continue
+ }
+ fmt.Fprintf(&b, ", %v", p)
+ }
+ fmt.Fprintf(&b, "}")
+ return b.String()
+}
+
+func (s *symbolPositionSet) set() []symbolPosition {
+ s.sortAndRemoveDuplicates()
+ return s.s
+}
+
+func (s *symbolPositionSet) add(pos symbolPosition) *symbolPositionSet {
+ s.s = append(s.s, pos)
+ s.sorted = false
+ return s
+}
+
+func (s *symbolPositionSet) merge(t *symbolPositionSet) *symbolPositionSet {
+ s.s = append(s.s, t.s...)
+ s.sorted = false
+ return s
+}
+
+func (s *symbolPositionSet) hash() string {
+ if len(s.s) <= 0 {
+ return ""
+ }
+ sorted := s.sortAndRemoveDuplicates()
+ var buf []byte
+ for _, p := range sorted {
+ b := make([]byte, 8)
+ binary.PutUvarint(b, uint64(p))
+ buf = append(buf, b...)
+ }
+ // Convert to a string to be able to use it as a key of a map.
+ // But note this byte sequence is made from values of symbol positions,
+ // so this is not a well-formed UTF-8 sequence.
+ return string(buf)
+}
+
+func (s *symbolPositionSet) sortAndRemoveDuplicates() []symbolPosition {
+ if s.sorted {
+ return s.s
+ }
+
+ sortSymbolPositions(s.s, 0, len(s.s)-1)
+
+ // Remove duplicates.
+ lastV := s.s[0]
+ nextIdx := 1
+ for _, v := range s.s[1:] {
+ if v == lastV {
+ continue
+ }
+ s.s[nextIdx] = v
+ nextIdx++
+ lastV = v
+ }
+ s.s = s.s[:nextIdx]
+ s.sorted = true
+
+ return s.s
+}
+
+// sortSymbolPositions sorts a slice of symbol positions as it uses quick sort.
+func sortSymbolPositions(ps []symbolPosition, left, right int) {
+ if left >= right {
+ return
+ }
+ var pivot symbolPosition
+ {
+ // Use a median as a pivot.
+ p1 := ps[left]
+ p2 := ps[(left+right)/2]
+ p3 := ps[right]
+ if p1 > p2 {
+ p1, p2 = p2, p1
+ }
+ if p2 > p3 {
+ p2 = p3
+ if p1 > p2 {
+ p2 = p1
+ }
+ }
+ pivot = p2
+ }
+ i := left
+ j := right
+ for i <= j {
+ for ps[i] < pivot {
+ i++
+ }
+ for ps[j] > pivot {
+ j--
+ }
+ if i <= j {
+ ps[i], ps[j] = ps[j], ps[i]
+ i++
+ j--
+ }
+ }
+ sortSymbolPositions(ps, left, j)
+ sortSymbolPositions(ps, i, right)
+}
diff --git a/grammar/lexical/dfa/symbol_position_test.go b/grammar/lexical/dfa/symbol_position_test.go
new file mode 100644
index 0000000..c867f64
--- /dev/null
+++ b/grammar/lexical/dfa/symbol_position_test.go
@@ -0,0 +1,79 @@
+package dfa
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestNewSymbolPosition(t *testing.T) {
+ tests := []struct {
+ n uint16
+ endMark bool
+ err bool
+ }{
+ {
+ n: 0,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: 0,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: true,
+ err: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) {
+ pos, err := newSymbolPosition(tt.n, tt.endMark)
+ if tt.err {
+ if err == nil {
+ t.Fatal("err is nil")
+ }
+ return
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, endMark := pos.describe()
+ if n != tt.n || endMark != tt.endMark {
+ t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark)
+ }
+ })
+ }
+}
diff --git a/grammar/lexical/dfa/tree.go b/grammar/lexical/dfa/tree.go
new file mode 100644
index 0000000..cd6081e
--- /dev/null
+++ b/grammar/lexical/dfa/tree.go
@@ -0,0 +1,567 @@
+package dfa
+
+import (
+ "fmt"
+ "io"
+ "sort"
+
+ "github.com/nihei9/vartan/grammar/lexical/parser"
+ spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/utf8"
+)
+
+type byteTree interface {
+ fmt.Stringer
+ children() (byteTree, byteTree)
+ nullable() bool
+ first() *symbolPositionSet
+ last() *symbolPositionSet
+ clone() byteTree
+}
+
+var (
+ _ byteTree = &symbolNode{}
+ _ byteTree = &endMarkerNode{}
+ _ byteTree = &concatNode{}
+ _ byteTree = &altNode{}
+ _ byteTree = &repeatNode{}
+ _ byteTree = &optionNode{}
+)
+
+type byteRange struct {
+ from byte
+ to byte
+}
+
+type symbolNode struct {
+ byteRange
+ pos symbolPosition
+ firstMemo *symbolPositionSet
+ lastMemo *symbolPositionSet
+}
+
+func newSymbolNode(value byte) *symbolNode {
+ return &symbolNode{
+ byteRange: byteRange{
+ from: value,
+ to: value,
+ },
+ pos: symbolPositionNil,
+ }
+}
+
+func newRangeSymbolNode(from, to byte) *symbolNode {
+ return &symbolNode{
+ byteRange: byteRange{
+ from: from,
+ to: to,
+ },
+ pos: symbolPositionNil,
+ }
+}
+
+func (n *symbolNode) String() string {
+ return fmt.Sprintf("symbol: value: %v-%v, pos: %v", n.from, n.to, n.pos)
+}
+
+func (n *symbolNode) children() (byteTree, byteTree) {
+ return nil, nil
+}
+
+func (n *symbolNode) nullable() bool {
+ return false
+}
+
+func (n *symbolNode) first() *symbolPositionSet {
+ if n.firstMemo == nil {
+ n.firstMemo = newSymbolPositionSet()
+ n.firstMemo.add(n.pos)
+ }
+ return n.firstMemo
+}
+
+func (n *symbolNode) last() *symbolPositionSet {
+ if n.lastMemo == nil {
+ n.lastMemo = newSymbolPositionSet()
+ n.lastMemo.add(n.pos)
+ }
+ return n.lastMemo
+}
+
+func (n *symbolNode) clone() byteTree {
+ return newRangeSymbolNode(n.from, n.to)
+}
+
+type endMarkerNode struct {
+ id spec.LexModeKindID
+ pos symbolPosition
+ firstMemo *symbolPositionSet
+ lastMemo *symbolPositionSet
+}
+
+func newEndMarkerNode(id spec.LexModeKindID) *endMarkerNode {
+ return &endMarkerNode{
+ id: id,
+ pos: symbolPositionNil,
+ }
+}
+
+func (n *endMarkerNode) String() string {
+ return fmt.Sprintf("end: pos: %v", n.pos)
+}
+
+func (n *endMarkerNode) children() (byteTree, byteTree) {
+ return nil, nil
+}
+
+func (n *endMarkerNode) nullable() bool {
+ return false
+}
+
+func (n *endMarkerNode) first() *symbolPositionSet {
+ if n.firstMemo == nil {
+ n.firstMemo = newSymbolPositionSet()
+ n.firstMemo.add(n.pos)
+ }
+ return n.firstMemo
+}
+
+func (n *endMarkerNode) last() *symbolPositionSet {
+ if n.lastMemo == nil {
+ n.lastMemo = newSymbolPositionSet()
+ n.lastMemo.add(n.pos)
+ }
+ return n.lastMemo
+}
+
+func (n *endMarkerNode) clone() byteTree {
+ return newEndMarkerNode(n.id)
+}
+
+type concatNode struct {
+ left byteTree
+ right byteTree
+ firstMemo *symbolPositionSet
+ lastMemo *symbolPositionSet
+}
+
+func newConcatNode(left, right byteTree) *concatNode {
+ return &concatNode{
+ left: left,
+ right: right,
+ }
+}
+
+func (n *concatNode) String() string {
+ return "concat"
+}
+
+func (n *concatNode) children() (byteTree, byteTree) {
+ return n.left, n.right
+}
+
+func (n *concatNode) nullable() bool {
+ return n.left.nullable() && n.right.nullable()
+}
+
+func (n *concatNode) first() *symbolPositionSet {
+ if n.firstMemo == nil {
+ n.firstMemo = newSymbolPositionSet()
+ n.firstMemo.merge(n.left.first())
+ if n.left.nullable() {
+ n.firstMemo.merge(n.right.first())
+ }
+ n.firstMemo.sortAndRemoveDuplicates()
+ }
+ return n.firstMemo
+}
+
+func (n *concatNode) last() *symbolPositionSet {
+ if n.lastMemo == nil {
+ n.lastMemo = newSymbolPositionSet()
+ n.lastMemo.merge(n.right.last())
+ if n.right.nullable() {
+ n.lastMemo.merge(n.left.last())
+ }
+ n.lastMemo.sortAndRemoveDuplicates()
+ }
+ return n.lastMemo
+}
+
+func (n *concatNode) clone() byteTree {
+ return newConcatNode(n.left.clone(), n.right.clone())
+}
+
+type altNode struct {
+ left byteTree
+ right byteTree
+ firstMemo *symbolPositionSet
+ lastMemo *symbolPositionSet
+}
+
+func newAltNode(left, right byteTree) *altNode {
+ return &altNode{
+ left: left,
+ right: right,
+ }
+}
+
+func (n *altNode) String() string {
+ return "alt"
+}
+
+func (n *altNode) children() (byteTree, byteTree) {
+ return n.left, n.right
+}
+
+func (n *altNode) nullable() bool {
+ return n.left.nullable() || n.right.nullable()
+}
+
+func (n *altNode) first() *symbolPositionSet {
+ if n.firstMemo == nil {
+ n.firstMemo = newSymbolPositionSet()
+ n.firstMemo.merge(n.left.first())
+ n.firstMemo.merge(n.right.first())
+ n.firstMemo.sortAndRemoveDuplicates()
+ }
+ return n.firstMemo
+}
+
+func (n *altNode) last() *symbolPositionSet {
+ if n.lastMemo == nil {
+ n.lastMemo = newSymbolPositionSet()
+ n.lastMemo.merge(n.left.last())
+ n.lastMemo.merge(n.right.last())
+ n.lastMemo.sortAndRemoveDuplicates()
+ }
+ return n.lastMemo
+}
+
+func (n *altNode) clone() byteTree {
+ return newAltNode(n.left.clone(), n.right.clone())
+}
+
+type repeatNode struct {
+ left byteTree
+ firstMemo *symbolPositionSet
+ lastMemo *symbolPositionSet
+}
+
+func newRepeatNode(left byteTree) *repeatNode {
+ return &repeatNode{
+ left: left,
+ }
+}
+
+func (n *repeatNode) String() string {
+ return "repeat"
+}
+
+func (n *repeatNode) children() (byteTree, byteTree) {
+ return n.left, nil
+}
+
+func (n *repeatNode) nullable() bool {
+ return true
+}
+
+func (n *repeatNode) first() *symbolPositionSet {
+ if n.firstMemo == nil {
+ n.firstMemo = newSymbolPositionSet()
+ n.firstMemo.merge(n.left.first())
+ n.firstMemo.sortAndRemoveDuplicates()
+ }
+ return n.firstMemo
+}
+
+func (n *repeatNode) last() *symbolPositionSet {
+ if n.lastMemo == nil {
+ n.lastMemo = newSymbolPositionSet()
+ n.lastMemo.merge(n.left.last())
+ n.lastMemo.sortAndRemoveDuplicates()
+ }
+ return n.lastMemo
+}
+
+func (n *repeatNode) clone() byteTree {
+ return newRepeatNode(n.left.clone())
+}
+
+type optionNode struct {
+ left byteTree
+ firstMemo *symbolPositionSet
+ lastMemo *symbolPositionSet
+}
+
+func newOptionNode(left byteTree) *optionNode {
+ return &optionNode{
+ left: left,
+ }
+}
+
+func (n *optionNode) String() string {
+ return "option"
+}
+
+func (n *optionNode) children() (byteTree, byteTree) {
+ return n.left, nil
+}
+
+func (n *optionNode) nullable() bool {
+ return true
+}
+
+func (n *optionNode) first() *symbolPositionSet {
+ if n.firstMemo == nil {
+ n.firstMemo = newSymbolPositionSet()
+ n.firstMemo.merge(n.left.first())
+ n.firstMemo.sortAndRemoveDuplicates()
+ }
+ return n.firstMemo
+}
+
+func (n *optionNode) last() *symbolPositionSet {
+ if n.lastMemo == nil {
+ n.lastMemo = newSymbolPositionSet()
+ n.lastMemo.merge(n.left.last())
+ n.lastMemo.sortAndRemoveDuplicates()
+ }
+ return n.lastMemo
+}
+
+func (n *optionNode) clone() byteTree {
+ return newOptionNode(n.left.clone())
+}
+
+type followTable map[symbolPosition]*symbolPositionSet
+
+func genFollowTable(root byteTree) followTable {
+ follow := followTable{}
+ calcFollow(follow, root)
+ return follow
+}
+
+func calcFollow(follow followTable, ast byteTree) {
+ if ast == nil {
+ return
+ }
+ left, right := ast.children()
+ calcFollow(follow, left)
+ calcFollow(follow, right)
+ switch n := ast.(type) {
+ case *concatNode:
+ l, r := n.children()
+ for _, p := range l.last().set() {
+ if _, ok := follow[p]; !ok {
+ follow[p] = newSymbolPositionSet()
+ }
+ follow[p].merge(r.first())
+ }
+ case *repeatNode:
+ for _, p := range n.last().set() {
+ if _, ok := follow[p]; !ok {
+ follow[p] = newSymbolPositionSet()
+ }
+ follow[p].merge(n.first())
+ }
+ }
+}
+
+func positionSymbols(node byteTree, n uint16) (uint16, error) {
+ if node == nil {
+ return n, nil
+ }
+
+ l, r := node.children()
+ p := n
+ p, err := positionSymbols(l, p)
+ if err != nil {
+ return p, err
+ }
+ p, err = positionSymbols(r, p)
+ if err != nil {
+ return p, err
+ }
+ switch n := node.(type) {
+ case *symbolNode:
+ n.pos, err = newSymbolPosition(p, false)
+ if err != nil {
+ return p, err
+ }
+ p++
+ case *endMarkerNode:
+ n.pos, err = newSymbolPosition(p, true)
+ if err != nil {
+ return p, err
+ }
+ p++
+ }
+ node.first()
+ node.last()
+ return p, nil
+}
+
+func concat(ts ...byteTree) byteTree {
+ nonNilNodes := []byteTree{}
+ for _, t := range ts {
+ if t == nil {
+ continue
+ }
+ nonNilNodes = append(nonNilNodes, t)
+ }
+ if len(nonNilNodes) <= 0 {
+ return nil
+ }
+ if len(nonNilNodes) == 1 {
+ return nonNilNodes[0]
+ }
+ concat := newConcatNode(nonNilNodes[0], nonNilNodes[1])
+ for _, t := range nonNilNodes[2:] {
+ concat = newConcatNode(concat, t)
+ }
+ return concat
+}
+
+func oneOf(ts ...byteTree) byteTree {
+ nonNilNodes := []byteTree{}
+ for _, t := range ts {
+ if t == nil {
+ continue
+ }
+ nonNilNodes = append(nonNilNodes, t)
+ }
+ if len(nonNilNodes) <= 0 {
+ return nil
+ }
+ if len(nonNilNodes) == 1 {
+ return nonNilNodes[0]
+ }
+ alt := newAltNode(nonNilNodes[0], nonNilNodes[1])
+ for _, t := range nonNilNodes[2:] {
+ alt = newAltNode(alt, t)
+ }
+ return alt
+}
+
+//nolint:unused
+func printByteTree(w io.Writer, t byteTree, ruledLine string, childRuledLinePrefix string, withAttrs bool) {
+ if t == nil {
+ return
+ }
+ fmt.Fprintf(w, "%v%v", ruledLine, t)
+ if withAttrs {
+ fmt.Fprintf(w, ", nullable: %v, first: %v, last: %v", t.nullable(), t.first(), t.last())
+ }
+ fmt.Fprintf(w, "\n")
+ left, right := t.children()
+ children := []byteTree{}
+ if left != nil {
+ children = append(children, left)
+ }
+ if right != nil {
+ children = append(children, right)
+ }
+ num := len(children)
+ for i, child := range children {
+ line := "└─ "
+ if num > 1 {
+ if i == 0 {
+ line = "├─ "
+ } else if i < num-1 {
+ line = "│ "
+ }
+ }
+ prefix := "│ "
+ if i >= num-1 {
+ prefix = " "
+ }
+ printByteTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix, withAttrs)
+ }
+}
+
+func ConvertCPTreeToByteTree(cpTrees map[spec.LexModeKindID]parser.CPTree) (byteTree, *symbolTable, error) {
+ var ids []spec.LexModeKindID
+ for id := range cpTrees {
+ ids = append(ids, id)
+ }
+ sort.Slice(ids, func(i, j int) bool {
+ return ids[i] < ids[j]
+ })
+
+ var bt byteTree
+ for _, id := range ids {
+ cpTree := cpTrees[id]
+ t, err := convCPTreeToByteTree(cpTree)
+ if err != nil {
+ return nil, nil, err
+ }
+ bt = oneOf(bt, concat(t, newEndMarkerNode(id)))
+ }
+ _, err := positionSymbols(bt, symbolPositionMin)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ return bt, genSymbolTable(bt), nil
+}
+
+func convCPTreeToByteTree(cpTree parser.CPTree) (byteTree, error) {
+ if from, to, ok := cpTree.Range(); ok {
+ bs, err := utf8.GenCharBlocks(from, to)
+ if err != nil {
+ return nil, err
+ }
+ var a byteTree
+ for _, b := range bs {
+ var c byteTree
+ for i := 0; i < len(b.From); i++ {
+ c = concat(c, newRangeSymbolNode(b.From[i], b.To[i]))
+ }
+ a = oneOf(a, c)
+ }
+ return a, nil
+ }
+
+ if tree, ok := cpTree.Repeatable(); ok {
+ t, err := convCPTreeToByteTree(tree)
+ if err != nil {
+ return nil, err
+ }
+ return newRepeatNode(t), nil
+ }
+
+ if tree, ok := cpTree.Optional(); ok {
+ t, err := convCPTreeToByteTree(tree)
+ if err != nil {
+ return nil, err
+ }
+ return newOptionNode(t), nil
+ }
+
+ if left, right, ok := cpTree.Concatenation(); ok {
+ l, err := convCPTreeToByteTree(left)
+ if err != nil {
+ return nil, err
+ }
+ r, err := convCPTreeToByteTree(right)
+ if err != nil {
+ return nil, err
+ }
+ return newConcatNode(l, r), nil
+ }
+
+ if left, right, ok := cpTree.Alternatives(); ok {
+ l, err := convCPTreeToByteTree(left)
+ if err != nil {
+ return nil, err
+ }
+ r, err := convCPTreeToByteTree(right)
+ if err != nil {
+ return nil, err
+ }
+ return newAltNode(l, r), nil
+ }
+
+ return nil, fmt.Errorf("invalid tree type: %T", cpTree)
+}
diff --git a/grammar/lexical/dfa/tree_test.go b/grammar/lexical/dfa/tree_test.go
new file mode 100644
index 0000000..e0abe64
--- /dev/null
+++ b/grammar/lexical/dfa/tree_test.go
@@ -0,0 +1,257 @@
+package dfa
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/grammar/lexical/parser"
+ spec "github.com/nihei9/vartan/spec/grammar"
+)
+
+func TestByteTree(t *testing.T) {
+ tests := []struct {
+ root byteTree
+ nullable bool
+ first *symbolPositionSet
+ last *symbolPositionSet
+ }{
+ {
+ root: newSymbolNodeWithPos(0, 1),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newEndMarkerNodeWithPos(1, 1),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newConcatNode(
+ newSymbolNodeWithPos(0, 1),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(2),
+ },
+ {
+ root: newConcatNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(2),
+ },
+ {
+ root: newConcatNode(
+ newSymbolNodeWithPos(0, 1),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newConcatNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newSymbolNodeWithPos(0, 1),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newSymbolNodeWithPos(0, 1),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ nullable: true,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newOptionNode(newSymbolNodeWithPos(0, 1)),
+ nullable: true,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ if tt.root.nullable() != tt.nullable {
+ t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable())
+ }
+ if tt.first.hash() != tt.root.first().hash() {
+ t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first())
+ }
+ if tt.last.hash() != tt.root.last().hash() {
+ t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last())
+ }
+ })
+ }
+}
+
+func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode {
+ n := newSymbolNode(v)
+ n.pos = pos
+ return n
+}
+
+func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode {
+ n := newEndMarkerNode(spec.LexModeKindID(id))
+ n.pos = pos
+ return n
+}
+
+func TestFollowAndSymbolTable(t *testing.T) {
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
+ cpt, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
+ spec.LexModeKindIDMin: cpt,
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ {
+ followTab := genFollowTable(bt)
+ if followTab == nil {
+ t.Fatal("follow table is nil")
+ }
+ expectedFollowTab := followTable{
+ 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
+ 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
+ 3: newSymbolPositionSet().add(symPos(4)),
+ 4: newSymbolPositionSet().add(symPos(5)),
+ 5: newSymbolPositionSet().add(endPos(6)),
+ }
+ testFollowTable(t, expectedFollowTab, followTab)
+ }
+
+ {
+ entry := func(v byte) byteRange {
+ return byteRange{
+ from: v,
+ to: v,
+ }
+ }
+
+ expectedSymTab := &symbolTable{
+ symPos2Byte: map[symbolPosition]byteRange{
+ symPos(1): entry(byte('a')),
+ symPos(2): entry(byte('b')),
+ symPos(3): entry(byte('a')),
+ symPos(4): entry(byte('b')),
+ symPos(5): entry(byte('b')),
+ },
+ endPos2ID: map[symbolPosition]spec.LexModeKindID{
+ endPos(6): 1,
+ },
+ }
+ testSymbolTable(t, expectedSymTab, symTab)
+ }
+}
+
+func testFollowTable(t *testing.T, expected, actual followTable) {
+ if len(actual) != len(expected) {
+ t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual))
+ }
+ for ePos, eSet := range expected {
+ aSet, ok := actual[ePos]
+ if !ok {
+ t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet)
+ }
+ if aSet.hash() != eSet.hash() {
+ t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet)
+ }
+ }
+}
+
+func testSymbolTable(t *testing.T, expected, actual *symbolTable) {
+ t.Helper()
+
+ if len(actual.symPos2Byte) != len(expected.symPos2Byte) {
+ t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte))
+ }
+ for ePos, eByte := range expected.symPos2Byte {
+ byte, ok := actual.symPos2Byte[ePos]
+ if !ok {
+ t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte)
+ continue
+ }
+ if byte.from != eByte.from || byte.to != eByte.to {
+ t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte)
+ }
+ }
+
+ if len(actual.endPos2ID) != len(expected.endPos2ID) {
+ t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID))
+ }
+ for ePos, eID := range expected.endPos2ID {
+ id, ok := actual.endPos2ID[ePos]
+ if !ok {
+ t.Errorf("an end position entry is not found: %v -> %v", ePos, eID)
+ continue
+ }
+ if id != eID {
+ t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id)
+ }
+ }
+}
diff --git a/grammar/lexical/entry.go b/grammar/lexical/entry.go
new file mode 100644
index 0000000..c9f8691
--- /dev/null
+++ b/grammar/lexical/entry.go
@@ -0,0 +1,171 @@
+package lexical
+
+import (
+ "fmt"
+ "sort"
+ "strings"
+
+ spec "github.com/nihei9/vartan/spec/grammar"
+)
+
+type LexEntry struct {
+ Kind spec.LexKindName
+ Pattern string
+ Modes []spec.LexModeName
+ Push spec.LexModeName
+ Pop bool
+ Fragment bool
+}
+
+type LexSpec struct {
+ Entries []*LexEntry
+}
+
+func (s *LexSpec) Validate() error {
+ if len(s.Entries) <= 0 {
+ return fmt.Errorf("the lexical specification must have at least one entry")
+ }
+ {
+ ks := map[string]struct{}{}
+ fks := map[string]struct{}{}
+ for _, e := range s.Entries {
+ // Allow duplicate names between fragments and non-fragments.
+ if e.Fragment {
+ if _, exist := fks[e.Kind.String()]; exist {
+ return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
+ }
+ fks[e.Kind.String()] = struct{}{}
+ } else {
+ if _, exist := ks[e.Kind.String()]; exist {
+ return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
+ }
+ ks[e.Kind.String()] = struct{}{}
+ }
+ }
+ }
+ {
+ kinds := []string{}
+ modes := []string{
+ spec.LexModeNameDefault.String(), // This is a predefined mode.
+ }
+ for _, e := range s.Entries {
+ if e.Fragment {
+ continue
+ }
+
+ kinds = append(kinds, e.Kind.String())
+
+ for _, m := range e.Modes {
+ modes = append(modes, m.String())
+ }
+ }
+
+ kindErrs := findSpellingInconsistenciesErrors(kinds, nil)
+ modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error {
+ if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(spec.LexModeNameDefault.String()) {
+ var b strings.Builder
+ fmt.Fprintf(&b, "%+v", ids[0])
+ for _, id := range ids[1:] {
+ fmt.Fprintf(&b, ", %+v", id)
+ }
+ return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", spec.LexModeNameDefault, b.String())
+ }
+ return nil
+ })
+ errs := append(kindErrs, modeErrs...)
+ if len(errs) > 0 {
+ var b strings.Builder
+ fmt.Fprintf(&b, "%v", errs[0])
+ for _, err := range errs[1:] {
+ fmt.Fprintf(&b, "\n%v", err)
+ }
+ return fmt.Errorf(b.String())
+ }
+ }
+
+ return nil
+}
+
+func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error {
+ duplicated := FindSpellingInconsistencies(ids)
+ if len(duplicated) == 0 {
+ return nil
+ }
+
+ var errs []error
+ for _, dup := range duplicated {
+ if hook != nil {
+ err := hook(dup)
+ if err != nil {
+ errs = append(errs, err)
+ continue
+ }
+ }
+
+ var b strings.Builder
+ fmt.Fprintf(&b, "%+v", dup[0])
+ for _, id := range dup[1:] {
+ fmt.Fprintf(&b, ", %+v", id)
+ }
+ err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String())
+ errs = append(errs, err)
+ }
+
+ return errs
+}
+
+// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same
+// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same
+// in UpperCamelCase. Thus they are considere to be spelling inconsistency.
+func FindSpellingInconsistencies(ids []string) [][]string {
+ m := map[string][]string{}
+ for _, id := range removeDuplicates(ids) {
+ c := SnakeCaseToUpperCamelCase(id)
+ m[c] = append(m[c], id)
+ }
+
+ var duplicated [][]string
+ for _, camels := range m {
+ if len(camels) == 1 {
+ continue
+ }
+ duplicated = append(duplicated, camels)
+ }
+
+ for _, dup := range duplicated {
+ sort.Slice(dup, func(i, j int) bool {
+ return dup[i] < dup[j]
+ })
+ }
+ sort.Slice(duplicated, func(i, j int) bool {
+ return duplicated[i][0] < duplicated[j][0]
+ })
+
+ return duplicated
+}
+
+func removeDuplicates(s []string) []string {
+ m := map[string]struct{}{}
+ for _, v := range s {
+ m[v] = struct{}{}
+ }
+
+ var unique []string
+ for v := range m {
+ unique = append(unique, v)
+ }
+
+ return unique
+}
+
+func SnakeCaseToUpperCamelCase(snake string) string {
+ elems := strings.Split(snake, "_")
+ for i, e := range elems {
+ if len(e) == 0 {
+ continue
+ }
+ elems[i] = strings.ToUpper(string(e[0])) + e[1:]
+ }
+
+ return strings.Join(elems, "")
+}
diff --git a/grammar/lexical/parser/error.go b/grammar/lexical/parser/error.go
new file mode 100644
index 0000000..be81da4
--- /dev/null
+++ b/grammar/lexical/parser/error.go
@@ -0,0 +1,36 @@
+package parser
+
+import "fmt"
+
+var (
+ ParseErr = fmt.Errorf("parse error")
+
+ // lexical errors
+ synErrIncompletedEscSeq = fmt.Errorf("incompleted escape sequence; unexpected EOF following \\")
+ synErrInvalidEscSeq = fmt.Errorf("invalid escape sequence")
+ synErrInvalidCodePoint = fmt.Errorf("code points must consist of just 4 or 6 hex digits")
+ synErrCharPropInvalidSymbol = fmt.Errorf("invalid character property symbol")
+ SynErrFragmentInvalidSymbol = fmt.Errorf("invalid fragment symbol")
+
+ // syntax errors
+ synErrUnexpectedToken = fmt.Errorf("unexpected token")
+ synErrNullPattern = fmt.Errorf("a pattern must be a non-empty byte sequence")
+ synErrUnmatchablePattern = fmt.Errorf("a pattern cannot match any characters")
+ synErrAltLackOfOperand = fmt.Errorf("an alternation expression must have operands")
+ synErrRepNoTarget = fmt.Errorf("a repeat expression must have an operand")
+ synErrGroupNoElem = fmt.Errorf("a grouping expression must include at least one character")
+ synErrGroupUnclosed = fmt.Errorf("unclosed grouping expression")
+ synErrGroupNoInitiator = fmt.Errorf(") needs preceding (")
+ synErrGroupInvalidForm = fmt.Errorf("invalid grouping expression")
+ synErrBExpNoElem = fmt.Errorf("a bracket expression must include at least one character")
+ synErrBExpUnclosed = fmt.Errorf("unclosed bracket expression")
+ synErrBExpInvalidForm = fmt.Errorf("invalid bracket expression")
+ synErrRangeInvalidOrder = fmt.Errorf("a range expression with invalid order")
+ synErrRangePropIsUnavailable = fmt.Errorf("a property expression is unavailable in a range expression")
+ synErrRangeInvalidForm = fmt.Errorf("invalid range expression")
+ synErrCPExpInvalidForm = fmt.Errorf("invalid code point expression")
+ synErrCPExpOutOfRange = fmt.Errorf("a code point must be between U+0000 to U+10FFFF")
+ synErrCharPropExpInvalidForm = fmt.Errorf("invalid character property expression")
+ synErrCharPropUnsupported = fmt.Errorf("unsupported character property")
+ synErrFragmentExpInvalidForm = fmt.Errorf("invalid fragment expression")
+)
diff --git a/grammar/lexical/parser/fragment.go b/grammar/lexical/parser/fragment.go
new file mode 100644
index 0000000..fc6f16b
--- /dev/null
+++ b/grammar/lexical/parser/fragment.go
@@ -0,0 +1,72 @@
+package parser
+
+import (
+ "fmt"
+
+ spec "github.com/nihei9/vartan/spec/grammar"
+)
+
+type incompleteFragment struct {
+ kind spec.LexKindName
+ root *rootNode
+}
+
+func CompleteFragments(fragments map[spec.LexKindName]CPTree) error {
+ if len(fragments) == 0 {
+ return nil
+ }
+
+ completeFragments := map[spec.LexKindName]CPTree{}
+ incompleteFragments := []*incompleteFragment{}
+ for kind, tree := range fragments {
+ root, ok := tree.(*rootNode)
+ if !ok {
+ return fmt.Errorf("CompleteFragments can take only *rootNode: %T", tree)
+ }
+ if root.incomplete() {
+ incompleteFragments = append(incompleteFragments, &incompleteFragment{
+ kind: kind,
+ root: root,
+ })
+ } else {
+ completeFragments[kind] = root
+ }
+ }
+ for len(incompleteFragments) > 0 {
+ lastIncompCount := len(incompleteFragments)
+ remainingFragments := []*incompleteFragment{}
+ for _, e := range incompleteFragments {
+ complete, err := ApplyFragments(e.root, completeFragments)
+ if err != nil {
+ return err
+ }
+ if !complete {
+ remainingFragments = append(remainingFragments, e)
+ } else {
+ completeFragments[e.kind] = e.root
+ }
+ }
+ incompleteFragments = remainingFragments
+ if len(incompleteFragments) == lastIncompCount {
+ return ParseErr
+ }
+ }
+
+ return nil
+}
+
+func ApplyFragments(t CPTree, fragments map[spec.LexKindName]CPTree) (bool, error) {
+ root, ok := t.(*rootNode)
+ if !ok {
+ return false, fmt.Errorf("ApplyFragments can take only *rootNode type: %T", t)
+ }
+
+ for name, frag := range fragments {
+ err := root.applyFragment(name, frag)
+ if err != nil {
+ return false, err
+ }
+ }
+
+ return !root.incomplete(), nil
+}
diff --git a/grammar/lexical/parser/lexer.go b/grammar/lexical/parser/lexer.go
new file mode 100644
index 0000000..3861825
--- /dev/null
+++ b/grammar/lexical/parser/lexer.go
@@ -0,0 +1,594 @@
+package parser
+
+import (
+ "bufio"
+ "fmt"
+ "io"
+ "strings"
+)
+
+type tokenKind string
+
+const (
+ tokenKindChar tokenKind = "char"
+ tokenKindAnyChar tokenKind = "."
+ tokenKindRepeat tokenKind = "*"
+ tokenKindRepeatOneOrMore tokenKind = "+"
+ tokenKindOption tokenKind = "?"
+ tokenKindAlt tokenKind = "|"
+ tokenKindGroupOpen tokenKind = "("
+ tokenKindGroupClose tokenKind = ")"
+ tokenKindBExpOpen tokenKind = "["
+ tokenKindInverseBExpOpen tokenKind = "[^"
+ tokenKindBExpClose tokenKind = "]"
+ tokenKindCharRange tokenKind = "-"
+ tokenKindCodePointLeader tokenKind = "\\u"
+ tokenKindCharPropLeader tokenKind = "\\p"
+ tokenKindFragmentLeader tokenKind = "\\f"
+ tokenKindLBrace tokenKind = "{"
+ tokenKindRBrace tokenKind = "}"
+ tokenKindEqual tokenKind = "="
+ tokenKindCodePoint tokenKind = "code point"
+ tokenKindCharPropSymbol tokenKind = "character property symbol"
+ tokenKindFragmentSymbol tokenKind = "fragment symbol"
+ tokenKindEOF tokenKind = "eof"
+)
+
+type token struct {
+ kind tokenKind
+ char rune
+ propSymbol string
+ codePoint string
+ fragmentSymbol string
+}
+
+const nullChar = '\u0000'
+
+func newToken(kind tokenKind, char rune) *token {
+ return &token{
+ kind: kind,
+ char: char,
+ }
+}
+
+func newCodePointToken(codePoint string) *token {
+ return &token{
+ kind: tokenKindCodePoint,
+ codePoint: codePoint,
+ }
+}
+
+func newCharPropSymbolToken(propSymbol string) *token {
+ return &token{
+ kind: tokenKindCharPropSymbol,
+ propSymbol: propSymbol,
+ }
+}
+
+func newFragmentSymbolToken(fragmentSymbol string) *token {
+ return &token{
+ kind: tokenKindFragmentSymbol,
+ fragmentSymbol: fragmentSymbol,
+ }
+}
+
+type lexerMode string
+
+const (
+ lexerModeDefault lexerMode = "default"
+ lexerModeBExp lexerMode = "bracket expression"
+ lexerModeCPExp lexerMode = "code point expression"
+ lexerModeCharPropExp lexerMode = "character property expression"
+ lexerModeFragmentExp lexerMode = "fragment expression"
+)
+
+type lexerModeStack struct {
+ stack []lexerMode
+}
+
+func newLexerModeStack() *lexerModeStack {
+ return &lexerModeStack{
+ stack: []lexerMode{
+ lexerModeDefault,
+ },
+ }
+}
+
+func (s *lexerModeStack) top() lexerMode {
+ return s.stack[len(s.stack)-1]
+}
+
+func (s *lexerModeStack) push(m lexerMode) {
+ s.stack = append(s.stack, m)
+}
+
+func (s *lexerModeStack) pop() {
+ s.stack = s.stack[:len(s.stack)-1]
+}
+
+type rangeState string
+
+// [a-z]
+// ^^^^
+// |||`-- ready
+// ||`-- expect range terminator
+// |`-- read range initiator
+// `-- ready
+const (
+ rangeStateReady rangeState = "ready"
+ rangeStateReadRangeInitiator rangeState = "read range initiator"
+ rangeStateExpectRangeTerminator rangeState = "expect range terminator"
+)
+
+type lexer struct {
+ src *bufio.Reader
+ peekChar2 rune
+ peekEOF2 bool
+ peekChar1 rune
+ peekEOF1 bool
+ lastChar rune
+ reachedEOF bool
+ prevChar1 rune
+ prevEOF1 bool
+ prevChar2 rune
+ pervEOF2 bool
+ modeStack *lexerModeStack
+ rangeState rangeState
+
+ errCause error
+ errDetail string
+}
+
+func newLexer(src io.Reader) *lexer {
+ return &lexer{
+ src: bufio.NewReader(src),
+ peekChar2: nullChar,
+ peekEOF2: false,
+ peekChar1: nullChar,
+ peekEOF1: false,
+ lastChar: nullChar,
+ reachedEOF: false,
+ prevChar1: nullChar,
+ prevEOF1: false,
+ prevChar2: nullChar,
+ pervEOF2: false,
+ modeStack: newLexerModeStack(),
+ rangeState: rangeStateReady,
+ }
+}
+
+func (l *lexer) error() (string, error) {
+ return l.errDetail, l.errCause
+}
+
+func (l *lexer) next() (*token, error) {
+ c, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ return newToken(tokenKindEOF, nullChar), nil
+ }
+
+ switch l.modeStack.top() {
+ case lexerModeBExp:
+ tok, err := l.nextInBExp(c)
+ if err != nil {
+ return nil, err
+ }
+ if tok.kind == tokenKindChar || tok.kind == tokenKindCodePointLeader || tok.kind == tokenKindCharPropLeader {
+ switch l.rangeState {
+ case rangeStateReady:
+ l.rangeState = rangeStateReadRangeInitiator
+ case rangeStateExpectRangeTerminator:
+ l.rangeState = rangeStateReady
+ }
+ }
+ switch tok.kind {
+ case tokenKindBExpClose:
+ l.modeStack.pop()
+ case tokenKindCharRange:
+ l.rangeState = rangeStateExpectRangeTerminator
+ case tokenKindCodePointLeader:
+ l.modeStack.push(lexerModeCPExp)
+ case tokenKindCharPropLeader:
+ l.modeStack.push(lexerModeCharPropExp)
+ }
+ return tok, nil
+ case lexerModeCPExp:
+ tok, err := l.nextInCodePoint(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindRBrace:
+ l.modeStack.pop()
+ }
+ return tok, nil
+ case lexerModeCharPropExp:
+ tok, err := l.nextInCharProp(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindRBrace:
+ l.modeStack.pop()
+ }
+ return tok, nil
+ case lexerModeFragmentExp:
+ tok, err := l.nextInFragment(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindRBrace:
+ l.modeStack.pop()
+ }
+ return tok, nil
+ default:
+ tok, err := l.nextInDefault(c)
+ if err != nil {
+ return nil, err
+ }
+ switch tok.kind {
+ case tokenKindBExpOpen:
+ l.modeStack.push(lexerModeBExp)
+ l.rangeState = rangeStateReady
+ case tokenKindInverseBExpOpen:
+ l.modeStack.push(lexerModeBExp)
+ l.rangeState = rangeStateReady
+ case tokenKindCodePointLeader:
+ l.modeStack.push(lexerModeCPExp)
+ case tokenKindCharPropLeader:
+ l.modeStack.push(lexerModeCharPropExp)
+ case tokenKindFragmentLeader:
+ l.modeStack.push(lexerModeFragmentExp)
+ }
+ return tok, nil
+ }
+}
+
+func (l *lexer) nextInDefault(c rune) (*token, error) {
+ switch c {
+ case '*':
+ return newToken(tokenKindRepeat, nullChar), nil
+ case '+':
+ return newToken(tokenKindRepeatOneOrMore, nullChar), nil
+ case '?':
+ return newToken(tokenKindOption, nullChar), nil
+ case '.':
+ return newToken(tokenKindAnyChar, nullChar), nil
+ case '|':
+ return newToken(tokenKindAlt, nullChar), nil
+ case '(':
+ return newToken(tokenKindGroupOpen, nullChar), nil
+ case ')':
+ return newToken(tokenKindGroupClose, nullChar), nil
+ case '[':
+ c1, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindBExpOpen, nullChar), nil
+ }
+ if c1 != '^' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindBExpOpen, nullChar), nil
+ }
+ c2, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindInverseBExpOpen, nullChar), nil
+ }
+ if c2 != ']' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindInverseBExpOpen, nullChar), nil
+ }
+ err = l.restore()
+ if err != nil {
+ return nil, err
+ }
+ err = l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindBExpOpen, nullChar), nil
+ case '\\':
+ c, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ l.errCause = synErrIncompletedEscSeq
+ return nil, ParseErr
+ }
+ if c == 'u' {
+ return newToken(tokenKindCodePointLeader, nullChar), nil
+ }
+ if c == 'p' {
+ return newToken(tokenKindCharPropLeader, nullChar), nil
+ }
+ if c == 'f' {
+ return newToken(tokenKindFragmentLeader, nullChar), nil
+ }
+ if c == '\\' || c == '.' || c == '*' || c == '+' || c == '?' || c == '|' || c == '(' || c == ')' || c == '[' || c == ']' {
+ return newToken(tokenKindChar, c), nil
+ }
+ l.errCause = synErrInvalidEscSeq
+ l.errDetail = fmt.Sprintf("\\%v is not supported", string(c))
+ return nil, ParseErr
+ default:
+ return newToken(tokenKindChar, c), nil
+ }
+}
+
+func (l *lexer) nextInBExp(c rune) (*token, error) {
+ switch c {
+ case '-':
+ if l.rangeState != rangeStateReadRangeInitiator {
+ return newToken(tokenKindChar, c), nil
+ }
+ c1, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindChar, c), nil
+ }
+ if c1 != ']' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindCharRange, nullChar), nil
+ }
+ err = l.restore()
+ if err != nil {
+ return nil, err
+ }
+ return newToken(tokenKindChar, c), nil
+ case ']':
+ return newToken(tokenKindBExpClose, nullChar), nil
+ case '\\':
+ c, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ l.errCause = synErrIncompletedEscSeq
+ return nil, ParseErr
+ }
+ if c == 'u' {
+ return newToken(tokenKindCodePointLeader, nullChar), nil
+ }
+ if c == 'p' {
+ return newToken(tokenKindCharPropLeader, nullChar), nil
+ }
+ if c == '\\' || c == '^' || c == '-' || c == ']' {
+ return newToken(tokenKindChar, c), nil
+ }
+ l.errCause = synErrInvalidEscSeq
+ l.errDetail = fmt.Sprintf("\\%v is not supported in a bracket expression", string(c))
+ return nil, ParseErr
+ default:
+ return newToken(tokenKindChar, c), nil
+ }
+}
+
+func (l *lexer) nextInCodePoint(c rune) (*token, error) {
+ switch c {
+ case '{':
+ return newToken(tokenKindLBrace, nullChar), nil
+ case '}':
+ return newToken(tokenKindRBrace, nullChar), nil
+ default:
+ if !isHexDigit(c) {
+ l.errCause = synErrInvalidCodePoint
+ return nil, ParseErr
+ }
+ var b strings.Builder
+ fmt.Fprint(&b, string(c))
+ n := 1
+ for {
+ c, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ break
+ }
+ if c == '}' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ break
+ }
+ if !isHexDigit(c) || n >= 6 {
+ l.errCause = synErrInvalidCodePoint
+ return nil, ParseErr
+ }
+ fmt.Fprint(&b, string(c))
+ n++
+ }
+ cp := b.String()
+ cpLen := len(cp)
+ if !(cpLen == 4 || cpLen == 6) {
+ l.errCause = synErrInvalidCodePoint
+ return nil, ParseErr
+ }
+ return newCodePointToken(b.String()), nil
+ }
+}
+
+func isHexDigit(c rune) bool {
+ if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' {
+ return true
+ }
+ return false
+}
+
+func (l *lexer) nextInCharProp(c rune) (*token, error) {
+ switch c {
+ case '{':
+ return newToken(tokenKindLBrace, nullChar), nil
+ case '}':
+ return newToken(tokenKindRBrace, nullChar), nil
+ case '=':
+ return newToken(tokenKindEqual, nullChar), nil
+ default:
+ var b strings.Builder
+ fmt.Fprint(&b, string(c))
+ n := 1
+ for {
+ c, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ break
+ }
+ if c == '}' || c == '=' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ break
+ }
+ fmt.Fprint(&b, string(c))
+ n++
+ }
+ sym := strings.TrimSpace(b.String())
+ if len(sym) == 0 {
+ l.errCause = synErrCharPropInvalidSymbol
+ return nil, ParseErr
+ }
+ return newCharPropSymbolToken(sym), nil
+ }
+}
+
+func (l *lexer) nextInFragment(c rune) (*token, error) {
+ switch c {
+ case '{':
+ return newToken(tokenKindLBrace, nullChar), nil
+ case '}':
+ return newToken(tokenKindRBrace, nullChar), nil
+ default:
+ var b strings.Builder
+ fmt.Fprint(&b, string(c))
+ n := 1
+ for {
+ c, eof, err := l.read()
+ if err != nil {
+ return nil, err
+ }
+ if eof {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ break
+ }
+ if c == '}' {
+ err := l.restore()
+ if err != nil {
+ return nil, err
+ }
+ break
+ }
+ fmt.Fprint(&b, string(c))
+ n++
+ }
+ sym := strings.TrimSpace(b.String())
+ if len(sym) == 0 {
+ l.errCause = SynErrFragmentInvalidSymbol
+ return nil, ParseErr
+ }
+ return newFragmentSymbolToken(sym), nil
+ }
+}
+
+func (l *lexer) read() (rune, bool, error) {
+ if l.reachedEOF {
+ return l.lastChar, l.reachedEOF, nil
+ }
+ if l.peekChar1 != nullChar || l.peekEOF1 {
+ l.prevChar2 = l.prevChar1
+ l.pervEOF2 = l.prevEOF1
+ l.prevChar1 = l.lastChar
+ l.prevEOF1 = l.reachedEOF
+ l.lastChar = l.peekChar1
+ l.reachedEOF = l.peekEOF1
+ l.peekChar1 = l.peekChar2
+ l.peekEOF1 = l.peekEOF2
+ l.peekChar2 = nullChar
+ l.peekEOF2 = false
+ return l.lastChar, l.reachedEOF, nil
+ }
+ c, _, err := l.src.ReadRune()
+ if err != nil {
+ if err == io.EOF {
+ l.prevChar2 = l.prevChar1
+ l.pervEOF2 = l.prevEOF1
+ l.prevChar1 = l.lastChar
+ l.prevEOF1 = l.reachedEOF
+ l.lastChar = nullChar
+ l.reachedEOF = true
+ return l.lastChar, l.reachedEOF, nil
+ }
+ return nullChar, false, err
+ }
+ l.prevChar2 = l.prevChar1
+ l.pervEOF2 = l.prevEOF1
+ l.prevChar1 = l.lastChar
+ l.prevEOF1 = l.reachedEOF
+ l.lastChar = c
+ l.reachedEOF = false
+ return l.lastChar, l.reachedEOF, nil
+}
+
+func (l *lexer) restore() error {
+ if l.lastChar == nullChar && !l.reachedEOF {
+ return fmt.Errorf("failed to call restore() because the last character is null")
+ }
+ l.peekChar2 = l.peekChar1
+ l.peekEOF2 = l.peekEOF1
+ l.peekChar1 = l.lastChar
+ l.peekEOF1 = l.reachedEOF
+ l.lastChar = l.prevChar1
+ l.reachedEOF = l.prevEOF1
+ l.prevChar1 = l.prevChar2
+ l.prevEOF1 = l.pervEOF2
+ l.prevChar2 = nullChar
+ l.pervEOF2 = false
+ return nil
+}
diff --git a/grammar/lexical/parser/lexer_test.go b/grammar/lexical/parser/lexer_test.go
new file mode 100644
index 0000000..055466e
--- /dev/null
+++ b/grammar/lexical/parser/lexer_test.go
@@ -0,0 +1,524 @@
+package parser
+
+import (
+ "strings"
+ "testing"
+)
+
+func TestLexer(t *testing.T) {
+ tests := []struct {
+ caption string
+ src string
+ tokens []*token
+ err error
+ }{
+ {
+ caption: "lexer can recognize ordinaly characters",
+ src: "123abcいろは",
+ tokens: []*token{
+ newToken(tokenKindChar, '1'),
+ newToken(tokenKindChar, '2'),
+ newToken(tokenKindChar, '3'),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, 'b'),
+ newToken(tokenKindChar, 'c'),
+ newToken(tokenKindChar, 'い'),
+ newToken(tokenKindChar, 'ろ'),
+ newToken(tokenKindChar, 'は'),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters in default mode",
+ src: ".*+?|()[\\u",
+ tokens: []*token{
+ newToken(tokenKindAnyChar, nullChar),
+ newToken(tokenKindRepeat, nullChar),
+ newToken(tokenKindRepeatOneOrMore, nullChar),
+ newToken(tokenKindOption, nullChar),
+ newToken(tokenKindAlt, nullChar),
+ newToken(tokenKindGroupOpen, nullChar),
+ newToken(tokenKindGroupClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the escape sequences in default mode",
+ src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[",
+ tokens: []*token{
+ newToken(tokenKindChar, '\\'),
+ newToken(tokenKindChar, '.'),
+ newToken(tokenKindChar, '*'),
+ newToken(tokenKindChar, '+'),
+ newToken(tokenKindChar, '?'),
+ newToken(tokenKindChar, '|'),
+ newToken(tokenKindChar, '('),
+ newToken(tokenKindChar, ')'),
+ newToken(tokenKindChar, '['),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "], {, and } are treated as an ordinary character in default mode",
+ src: "]{}",
+ tokens: []*token{
+ newToken(tokenKindChar, ']'),
+ newToken(tokenKindChar, '{'),
+ newToken(tokenKindChar, '}'),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters in bracket expression mode",
+ src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("09AF"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("09abcf"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the escape sequences in bracket expression mode",
+ src: "[\\^a\\-z]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "in a bracket expression, the special characters are also handled as normal characters",
+ src: "[\\\\.*+?|()[",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '\\'),
+ newToken(tokenKindChar, '.'),
+ newToken(tokenKindChar, '*'),
+ newToken(tokenKindChar, '+'),
+ newToken(tokenKindChar, '?'),
+ newToken(tokenKindChar, '|'),
+ newToken(tokenKindChar, '('),
+ newToken(tokenKindChar, ')'),
+ newToken(tokenKindChar, '['),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
+ // [...-...][...-][-...][-]
+ // ~~~~~~~ ~ ~ ~
+ // ^ ^ ^ ^
+ // | | | `-- Ordinary Character (b)
+ // | | `-- Ordinary Character (b)
+ // | `-- Ordinary Character (b)
+ // `-- Character Range (a)
+ //
+ // a. *-* is handled as a character-range expression.
+ // b. *-, -*, or - are handled as ordinary characters.
+ src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, 'a'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, 'z'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindCharRange, nullChar),
+ newToken(tokenKindChar, '-'),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
+ // [^...^...][^]
+ // ~~ ~ ~~
+ // ^ ^ ^^
+ // | | |`-- Ordinary Character (c)
+ // | | `-- Bracket Expression
+ // | `-- Ordinary Character (b)
+ // `-- Inverse Bracket Expression (a)
+ //
+ // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions.
+ // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols.
+ // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character.
+ src: "[^^][^]",
+ tokens: []*token{
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindChar, '^'),
+ newToken(tokenKindBExpClose, nullChar),
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer raises an error when an invalid escape sequence appears",
+ src: "\\@",
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
+ src: "\\",
+ err: synErrIncompletedEscSeq,
+ },
+ {
+ caption: "lexer raises an error when an invalid escape sequence appears",
+ src: "[\\@",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
+ src: "[\\",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrIncompletedEscSeq,
+ },
+ {
+ caption: "lexer can recognize the special characters and code points in code point expression mode",
+ src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("4567"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("89abcd"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("efAB"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("CDEF01"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "a one digit hex string isn't a valid code point",
+ src: "\\u{0",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a two digits hex string isn't a valid code point",
+ src: "\\u{01",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a three digits hex string isn't a valid code point",
+ src: "\\u{012",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a four digits hex string is a valid code point",
+ src: "\\u{0123}",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("0123"),
+ newToken(tokenKindRBrace, nullChar),
+ },
+ },
+ {
+ caption: "a five digits hex string isn't a valid code point",
+ src: "\\u{01234",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a six digits hex string is a valid code point",
+ src: "\\u{012345}",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCodePointToken("012345"),
+ newToken(tokenKindRBrace, nullChar),
+ },
+ },
+ {
+ caption: "a seven digits hex string isn't a valid code point",
+ src: "\\u{0123456",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a code point must be hex digits",
+ src: "\\u{g",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "a code point must be hex digits",
+ src: "\\u{G",
+ tokens: []*token{
+ newToken(tokenKindCodePointLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ },
+ err: synErrInvalidCodePoint,
+ },
+ {
+ caption: "lexer can recognize the special characters and symbols in character property expression mode",
+ src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]",
+ tokens: []*token{
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindBExpOpen, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindCharPropLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newCharPropSymbolToken("General_Category"),
+ newToken(tokenKindEqual, nullChar),
+ newCharPropSymbolToken("Letter"),
+ newToken(tokenKindRBrace, nullChar),
+ newToken(tokenKindBExpClose, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "lexer can recognize the special characters and symbols in fragment expression mode",
+ src: "\\f{integer}",
+ tokens: []*token{
+ newToken(tokenKindFragmentLeader, nullChar),
+ newToken(tokenKindLBrace, nullChar),
+ newFragmentSymbolToken("integer"),
+ newToken(tokenKindRBrace, nullChar),
+
+ newToken(tokenKindEOF, nullChar),
+ },
+ },
+ {
+ caption: "a fragment expression is not supported in a bracket expression",
+ src: "[\\f",
+ tokens: []*token{
+ newToken(tokenKindBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ {
+ caption: "a fragment expression is not supported in an inverse bracket expression",
+ src: "[^\\f",
+ tokens: []*token{
+ newToken(tokenKindInverseBExpOpen, nullChar),
+ },
+ err: synErrInvalidEscSeq,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.caption, func(t *testing.T) {
+ lex := newLexer(strings.NewReader(tt.src))
+ var err error
+ var tok *token
+ i := 0
+ for {
+ tok, err = lex.next()
+ if err != nil {
+ break
+ }
+ if i >= len(tt.tokens) {
+ break
+ }
+ eTok := tt.tokens[i]
+ i++
+ testToken(t, tok, eTok)
+
+ if tok.kind == tokenKindEOF {
+ break
+ }
+ }
+ if tt.err != nil {
+ if err != ParseErr {
+ t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
+ }
+ detail, cause := lex.error()
+ if cause != tt.err {
+ t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail)
+ }
+ } else {
+ if err != nil {
+ t.Fatalf("unexpected error: %v", err)
+ }
+ }
+ if i < len(tt.tokens) {
+ t.Fatalf("expecte more tokens")
+ }
+ })
+ }
+}
+
+func testToken(t *testing.T, a, e *token) {
+ t.Helper()
+ if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint {
+ t.Fatalf("unexpected token: want: %+v, got: %+v", e, a)
+ }
+}
diff --git a/grammar/lexical/parser/parser.go b/grammar/lexical/parser/parser.go
new file mode 100644
index 0000000..89362b8
--- /dev/null
+++ b/grammar/lexical/parser/parser.go
@@ -0,0 +1,531 @@
+package parser
+
+import (
+ "bytes"
+ "fmt"
+ "io"
+ "strconv"
+
+ spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/ucd"
+)
+
+type PatternEntry struct {
+ ID spec.LexModeKindID
+ Pattern []byte
+}
+
+type parser struct {
+ kind spec.LexKindName
+ lex *lexer
+ peekedTok *token
+ lastTok *token
+
+ // If and only if isContributoryPropertyExposed is true, the parser interprets contributory properties that
+ // appear in property expressions.
+ //
+ // The contributory properties are not exposed, and users cannot use those properties because the parser
+ // follows [UAX #44 5.13 Property APIs]. For instance, \p{Other_Alphabetic} is invalid.
+ //
+ // isContributoryPropertyExposed is set to true when the parser is generated recursively. The parser needs to
+ // interpret derived properties internally because the derived properties consist of other properties that
+ // may contain the contributory properties.
+ //
+ // [UAX #44 5.13 Property APIs] says:
+ // > The following subtypes of Unicode character properties should generally not be exposed in APIs,
+ // > except in limited circumstances. They may not be useful, particularly in public API collections,
+ // > and may instead prove misleading to the users of such API collections.
+ // > * Contributory properties are not recommended for public APIs.
+ // > ...
+ // https://unicode.org/reports/tr44/#Property_APIs
+ isContributoryPropertyExposed bool
+
+ errCause error
+ errDetail string
+}
+
+func NewParser(kind spec.LexKindName, src io.Reader) *parser {
+ return &parser{
+ kind: kind,
+ lex: newLexer(src),
+ isContributoryPropertyExposed: false,
+ }
+}
+
+func (p *parser) exposeContributoryProperty() {
+ p.isContributoryPropertyExposed = true
+}
+
+func (p *parser) Error() (string, error) {
+ return p.errDetail, p.errCause
+}
+
+func (p *parser) Parse() (root CPTree, retErr error) {
+ defer func() {
+ err := recover()
+ if err != nil {
+ var ok bool
+ retErr, ok = err.(error)
+ if !ok {
+ panic(err)
+ }
+ return
+ }
+ }()
+
+ return newRootNode(p.kind, p.parseRegexp()), nil
+}
+
+func (p *parser) parseRegexp() CPTree {
+ alt := p.parseAlt()
+ if alt == nil {
+ if p.consume(tokenKindGroupClose) {
+ p.raiseParseError(synErrGroupNoInitiator, "")
+ }
+ p.raiseParseError(synErrNullPattern, "")
+ }
+ if p.consume(tokenKindGroupClose) {
+ p.raiseParseError(synErrGroupNoInitiator, "")
+ }
+ p.expect(tokenKindEOF)
+ return alt
+}
+
+func (p *parser) parseAlt() CPTree {
+ left := p.parseConcat()
+ if left == nil {
+ if p.consume(tokenKindAlt) {
+ p.raiseParseError(synErrAltLackOfOperand, "")
+ }
+ return nil
+ }
+ for {
+ if !p.consume(tokenKindAlt) {
+ break
+ }
+ right := p.parseConcat()
+ if right == nil {
+ p.raiseParseError(synErrAltLackOfOperand, "")
+ }
+ left = newAltNode(left, right)
+ }
+ return left
+}
+
+func (p *parser) parseConcat() CPTree {
+ left := p.parseRepeat()
+ for {
+ right := p.parseRepeat()
+ if right == nil {
+ break
+ }
+ left = newConcatNode(left, right)
+ }
+ return left
+}
+
+func (p *parser) parseRepeat() CPTree {
+ group := p.parseGroup()
+ if group == nil {
+ if p.consume(tokenKindRepeat) {
+ p.raiseParseError(synErrRepNoTarget, "* needs an operand")
+ }
+ if p.consume(tokenKindRepeatOneOrMore) {
+ p.raiseParseError(synErrRepNoTarget, "+ needs an operand")
+ }
+ if p.consume(tokenKindOption) {
+ p.raiseParseError(synErrRepNoTarget, "? needs an operand")
+ }
+ return nil
+ }
+ if p.consume(tokenKindRepeat) {
+ return newRepeatNode(group)
+ }
+ if p.consume(tokenKindRepeatOneOrMore) {
+ return newRepeatOneOrMoreNode(group)
+ }
+ if p.consume(tokenKindOption) {
+ return newOptionNode(group)
+ }
+ return group
+}
+
+func (p *parser) parseGroup() CPTree {
+ if p.consume(tokenKindGroupOpen) {
+ alt := p.parseAlt()
+ if alt == nil {
+ if p.consume(tokenKindEOF) {
+ p.raiseParseError(synErrGroupUnclosed, "")
+ }
+ p.raiseParseError(synErrGroupNoElem, "")
+ }
+ if p.consume(tokenKindEOF) {
+ p.raiseParseError(synErrGroupUnclosed, "")
+ }
+ if !p.consume(tokenKindGroupClose) {
+ p.raiseParseError(synErrGroupInvalidForm, "")
+ }
+ return alt
+ }
+ return p.parseSingleChar()
+}
+
+func (p *parser) parseSingleChar() CPTree {
+ if p.consume(tokenKindAnyChar) {
+ return genAnyCharAST()
+ }
+ if p.consume(tokenKindBExpOpen) {
+ left := p.parseBExpElem()
+ if left == nil {
+ if p.consume(tokenKindEOF) {
+ p.raiseParseError(synErrBExpUnclosed, "")
+ }
+ p.raiseParseError(synErrBExpNoElem, "")
+ }
+ for {
+ right := p.parseBExpElem()
+ if right == nil {
+ break
+ }
+ left = newAltNode(left, right)
+ }
+ if p.consume(tokenKindEOF) {
+ p.raiseParseError(synErrBExpUnclosed, "")
+ }
+ p.expect(tokenKindBExpClose)
+ return left
+ }
+ if p.consume(tokenKindInverseBExpOpen) {
+ elem := p.parseBExpElem()
+ if elem == nil {
+ if p.consume(tokenKindEOF) {
+ p.raiseParseError(synErrBExpUnclosed, "")
+ }
+ p.raiseParseError(synErrBExpNoElem, "")
+ }
+ inverse := exclude(elem, genAnyCharAST())
+ if inverse == nil {
+ p.raiseParseError(synErrUnmatchablePattern, "")
+ }
+ for {
+ elem := p.parseBExpElem()
+ if elem == nil {
+ break
+ }
+ inverse = exclude(elem, inverse)
+ if inverse == nil {
+ p.raiseParseError(synErrUnmatchablePattern, "")
+ }
+ }
+ if p.consume(tokenKindEOF) {
+ p.raiseParseError(synErrBExpUnclosed, "")
+ }
+ p.expect(tokenKindBExpClose)
+ return inverse
+ }
+ if p.consume(tokenKindCodePointLeader) {
+ return p.parseCodePoint()
+ }
+ if p.consume(tokenKindCharPropLeader) {
+ return p.parseCharProp()
+ }
+ if p.consume(tokenKindFragmentLeader) {
+ return p.parseFragment()
+ }
+ c := p.parseNormalChar()
+ if c == nil {
+ if p.consume(tokenKindBExpClose) {
+ p.raiseParseError(synErrBExpInvalidForm, "")
+ }
+ return nil
+ }
+ return c
+}
+
+func (p *parser) parseBExpElem() CPTree {
+ var left CPTree
+ switch {
+ case p.consume(tokenKindCodePointLeader):
+ left = p.parseCodePoint()
+ case p.consume(tokenKindCharPropLeader):
+ left = p.parseCharProp()
+ if p.consume(tokenKindCharRange) {
+ p.raiseParseError(synErrRangePropIsUnavailable, "")
+ }
+ default:
+ left = p.parseNormalChar()
+ }
+ if left == nil {
+ return nil
+ }
+ if !p.consume(tokenKindCharRange) {
+ return left
+ }
+ var right CPTree
+ switch {
+ case p.consume(tokenKindCodePointLeader):
+ right = p.parseCodePoint()
+ case p.consume(tokenKindCharPropLeader):
+ p.raiseParseError(synErrRangePropIsUnavailable, "")
+ default:
+ right = p.parseNormalChar()
+ }
+ if right == nil {
+ p.raiseParseError(synErrRangeInvalidForm, "")
+ }
+ from, _, _ := left.Range()
+ _, to, _ := right.Range()
+ if !isValidOrder(from, to) {
+ p.raiseParseError(synErrRangeInvalidOrder, fmt.Sprintf("%X..%X", from, to))
+ }
+ return newRangeSymbolNode(from, to)
+}
+
+func (p *parser) parseCodePoint() CPTree {
+ if !p.consume(tokenKindLBrace) {
+ p.raiseParseError(synErrCPExpInvalidForm, "")
+ }
+ if !p.consume(tokenKindCodePoint) {
+ p.raiseParseError(synErrCPExpInvalidForm, "")
+ }
+
+ n, err := strconv.ParseInt(p.lastTok.codePoint, 16, 64)
+ if err != nil {
+ panic(fmt.Errorf("failed to decode a code point (%v) into a int: %v", p.lastTok.codePoint, err))
+ }
+ if n < 0x0000 || n > 0x10FFFF {
+ p.raiseParseError(synErrCPExpOutOfRange, "")
+ }
+
+ sym := newSymbolNode(rune(n))
+
+ if !p.consume(tokenKindRBrace) {
+ p.raiseParseError(synErrCPExpInvalidForm, "")
+ }
+
+ return sym
+}
+
+func (p *parser) parseCharProp() CPTree {
+ if !p.consume(tokenKindLBrace) {
+ p.raiseParseError(synErrCharPropExpInvalidForm, "")
+ }
+ var sym1, sym2 string
+ if !p.consume(tokenKindCharPropSymbol) {
+ p.raiseParseError(synErrCharPropExpInvalidForm, "")
+ }
+ sym1 = p.lastTok.propSymbol
+ if p.consume(tokenKindEqual) {
+ if !p.consume(tokenKindCharPropSymbol) {
+ p.raiseParseError(synErrCharPropExpInvalidForm, "")
+ }
+ sym2 = p.lastTok.propSymbol
+ }
+
+ var alt CPTree
+ var propName, propVal string
+ if sym2 != "" {
+ propName = sym1
+ propVal = sym2
+ } else {
+ propName = ""
+ propVal = sym1
+ }
+ if !p.isContributoryPropertyExposed && ucd.IsContributoryProperty(propName) {
+ p.raiseParseError(synErrCharPropUnsupported, propName)
+ }
+ pat, err := ucd.NormalizeCharacterProperty(propName, propVal)
+ if err != nil {
+ p.raiseParseError(synErrCharPropUnsupported, err.Error())
+ }
+ if pat != "" {
+ p := NewParser(p.kind, bytes.NewReader([]byte(pat)))
+ p.exposeContributoryProperty()
+ ast, err := p.Parse()
+ if err != nil {
+ panic(err)
+ }
+ alt = ast
+ } else {
+ cpRanges, inverse, err := ucd.FindCodePointRanges(propName, propVal)
+ if err != nil {
+ p.raiseParseError(synErrCharPropUnsupported, err.Error())
+ }
+ if inverse {
+ r := cpRanges[0]
+ alt = exclude(newRangeSymbolNode(r.From, r.To), genAnyCharAST())
+ if alt == nil {
+ p.raiseParseError(synErrUnmatchablePattern, "")
+ }
+ for _, r := range cpRanges[1:] {
+ alt = exclude(newRangeSymbolNode(r.From, r.To), alt)
+ if alt == nil {
+ p.raiseParseError(synErrUnmatchablePattern, "")
+ }
+ }
+ } else {
+ for _, r := range cpRanges {
+ alt = genAltNode(
+ alt,
+ newRangeSymbolNode(r.From, r.To),
+ )
+ }
+ }
+ }
+
+ if !p.consume(tokenKindRBrace) {
+ p.raiseParseError(synErrCharPropExpInvalidForm, "")
+ }
+
+ return alt
+}
+
+func (p *parser) parseFragment() CPTree {
+ if !p.consume(tokenKindLBrace) {
+ p.raiseParseError(synErrFragmentExpInvalidForm, "")
+ }
+ if !p.consume(tokenKindFragmentSymbol) {
+ p.raiseParseError(synErrFragmentExpInvalidForm, "")
+ }
+ sym := p.lastTok.fragmentSymbol
+
+ if !p.consume(tokenKindRBrace) {
+ p.raiseParseError(synErrFragmentExpInvalidForm, "")
+ }
+
+ return newFragmentNode(spec.LexKindName(sym), nil)
+}
+
+func (p *parser) parseNormalChar() CPTree {
+ if !p.consume(tokenKindChar) {
+ return nil
+ }
+ return newSymbolNode(p.lastTok.char)
+}
+
+func exclude(symbol, base CPTree) CPTree {
+ if left, right, ok := symbol.Alternatives(); ok {
+ return exclude(right, exclude(left, base))
+ }
+
+ if left, right, ok := base.Alternatives(); ok {
+ return genAltNode(
+ exclude(symbol, left),
+ exclude(symbol, right),
+ )
+ }
+
+ if bFrom, bTo, ok := base.Range(); ok {
+ sFrom, sTo, ok := symbol.Range()
+ if !ok {
+ panic(fmt.Errorf("invalid symbol tree: %T", symbol))
+ }
+
+ switch {
+ case sFrom > bFrom && sTo < bTo:
+ return genAltNode(
+ newRangeSymbolNode(bFrom, sFrom-1),
+ newRangeSymbolNode(sTo+1, bTo),
+ )
+ case sFrom <= bFrom && sTo >= bFrom && sTo < bTo:
+ return newRangeSymbolNode(sTo+1, bTo)
+ case sFrom > bFrom && sFrom <= bTo && sTo >= bTo:
+ return newRangeSymbolNode(bFrom, sFrom-1)
+ case sFrom <= bFrom && sTo >= bTo:
+ return nil
+ default:
+ return base
+ }
+ }
+
+ panic(fmt.Errorf("invalid base tree: %T", base))
+}
+
+func genAnyCharAST() CPTree {
+ return newRangeSymbolNode(0x0, 0x10FFFF)
+}
+
+func isValidOrder(from, to rune) bool {
+ return from <= to
+}
+
+func genConcatNode(cs ...CPTree) CPTree {
+ nonNilNodes := []CPTree{}
+ for _, c := range cs {
+ if c == nil {
+ continue
+ }
+ nonNilNodes = append(nonNilNodes, c)
+ }
+ if len(nonNilNodes) <= 0 {
+ return nil
+ }
+ if len(nonNilNodes) == 1 {
+ return nonNilNodes[0]
+ }
+ concat := newConcatNode(nonNilNodes[0], nonNilNodes[1])
+ for _, c := range nonNilNodes[2:] {
+ concat = newConcatNode(concat, c)
+ }
+ return concat
+}
+
+func genAltNode(cs ...CPTree) CPTree {
+ nonNilNodes := []CPTree{}
+ for _, c := range cs {
+ if c == nil {
+ continue
+ }
+ nonNilNodes = append(nonNilNodes, c)
+ }
+ if len(nonNilNodes) <= 0 {
+ return nil
+ }
+ if len(nonNilNodes) == 1 {
+ return nonNilNodes[0]
+ }
+ alt := newAltNode(nonNilNodes[0], nonNilNodes[1])
+ for _, c := range nonNilNodes[2:] {
+ alt = newAltNode(alt, c)
+ }
+ return alt
+}
+
+func (p *parser) expect(expected tokenKind) {
+ if !p.consume(expected) {
+ tok := p.peekedTok
+ p.raiseParseError(synErrUnexpectedToken, fmt.Sprintf("expected: %v, actual: %v", expected, tok.kind))
+ }
+}
+
+func (p *parser) consume(expected tokenKind) bool {
+ var tok *token
+ var err error
+ if p.peekedTok != nil {
+ tok = p.peekedTok
+ p.peekedTok = nil
+ } else {
+ tok, err = p.lex.next()
+ if err != nil {
+ if err == ParseErr {
+ detail, cause := p.lex.error()
+ p.raiseParseError(cause, detail)
+ }
+ panic(err)
+ }
+ }
+ p.lastTok = tok
+ if tok.kind == expected {
+ return true
+ }
+ p.peekedTok = tok
+ p.lastTok = nil
+
+ return false
+}
+
+func (p *parser) raiseParseError(err error, detail string) {
+ p.errCause = err
+ p.errDetail = detail
+ panic(ParseErr)
+}
diff --git a/grammar/lexical/parser/parser_test.go b/grammar/lexical/parser/parser_test.go
new file mode 100644
index 0000000..d6cc4a8
--- /dev/null
+++ b/grammar/lexical/parser/parser_test.go
@@ -0,0 +1,1389 @@
+package parser
+
+import (
+ "fmt"
+ "reflect"
+ "strings"
+ "testing"
+
+ spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/ucd"
+)
+
+func TestParse(t *testing.T) {
+ tests := []struct {
+ pattern string
+ fragments map[spec.LexKindName]string
+ ast CPTree
+ syntaxError error
+
+ // When an AST is large, as patterns containing a character property expression, this test only checks
+ // that the pattern is parsable. The check of the validity of such AST is performed by checking that it
+ // can be matched correctly using the driver.
+ skipTestAST bool
+ }{
+ {
+ pattern: "a",
+ ast: newSymbolNode('a'),
+ },
+ {
+ pattern: "abc",
+ ast: genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ },
+ {
+ pattern: "a?",
+ ast: newOptionNode(
+ newSymbolNode('a'),
+ ),
+ },
+ {
+ pattern: "[abc]?",
+ ast: newOptionNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\u{3042}?",
+ ast: newOptionNode(
+ newSymbolNode('\u3042'),
+ ),
+ },
+ {
+ pattern: "\\p{Letter}?",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\f{a2c}?",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: newOptionNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(a)?",
+ ast: newOptionNode(
+ newSymbolNode('a'),
+ ),
+ },
+ {
+ pattern: "((a?)?)?",
+ ast: newOptionNode(
+ newOptionNode(
+ newOptionNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(abc)?",
+ ast: newOptionNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "(a|b)?",
+ ast: newOptionNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ ),
+ ),
+ },
+ {
+ pattern: "?",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "(?)",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a|?",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "?|b",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a??",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a*",
+ ast: newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ },
+ {
+ pattern: "[abc]*",
+ ast: newRepeatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\u{3042}*",
+ ast: newRepeatNode(
+ newSymbolNode('\u3042'),
+ ),
+ },
+ {
+ pattern: "\\p{Letter}*",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\f{a2c}*",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: newRepeatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "((a*)*)*",
+ ast: newRepeatNode(
+ newRepeatNode(
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(abc)*",
+ ast: newRepeatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "(a|b)*",
+ ast: newRepeatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ ),
+ ),
+ },
+ {
+ pattern: "*",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "(*)",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a|*",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "*|b",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a**",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a+",
+ ast: genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ },
+ {
+ pattern: "[abc]+",
+ ast: genConcatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ newRepeatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "\\u{3042}+",
+ ast: genConcatNode(
+ newSymbolNode('\u3042'),
+ newRepeatNode(
+ newSymbolNode('\u3042'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\p{Letter}+",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\f{a2c}+",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: genConcatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ newRepeatNode(
+ newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "((a+)+)+",
+ ast: genConcatNode(
+ genConcatNode(
+ genConcatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newRepeatNode(
+ newSymbolNode('a'),
+ ),
+ ),
+ ),
+ ),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(abc)+",
+ ast: genConcatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ newRepeatNode(
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "(a|b)+",
+ ast: genConcatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ ),
+ newRepeatNode(
+ genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ ),
+ ),
+ ),
+ },
+ {
+ pattern: "+",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "(+)",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a|+",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "+|b",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: "a++",
+ syntaxError: synErrRepNoTarget,
+ },
+ {
+ pattern: ".",
+ ast: newRangeSymbolNode(0x00, 0x10FFFF),
+ },
+ {
+ pattern: "[a]",
+ ast: newSymbolNode('a'),
+ },
+ {
+ pattern: "[abc]",
+ ast: genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ },
+ {
+ pattern: "[a-z]",
+ ast: newRangeSymbolNode('a', 'z'),
+ },
+ {
+ pattern: "[A-Za-z]",
+ ast: genAltNode(
+ newRangeSymbolNode('A', 'Z'),
+ newRangeSymbolNode('a', 'z'),
+ ),
+ },
+ {
+ pattern: "[\\u{004E}]",
+ ast: newSymbolNode('N'),
+ },
+ {
+ pattern: "[\\u{0061}-\\u{007A}]",
+ ast: newRangeSymbolNode('a', 'z'),
+ },
+ {
+ pattern: "[\\p{Lu}]",
+ skipTestAST: true,
+ },
+ {
+ pattern: "[a-\\p{Lu}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[\\p{Lu}-z]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[\\p{Lu}-\\p{Ll}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[z-a]",
+ syntaxError: synErrRangeInvalidOrder,
+ },
+ {
+ pattern: "a[]",
+ syntaxError: synErrBExpNoElem,
+ },
+ {
+ pattern: "[]a",
+ syntaxError: synErrBExpNoElem,
+ },
+ {
+ pattern: "[]",
+ syntaxError: synErrBExpNoElem,
+ },
+ {
+ pattern: "[^\\u{004E}]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, '\u004E'-1),
+ newRangeSymbolNode('\u004E'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^\\u{0061}-\\u{007A}]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, '\u0061'-1),
+ newRangeSymbolNode('\u007A'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^\\p{Lu}]",
+ skipTestAST: true,
+ },
+ {
+ pattern: "[^a-\\p{Lu}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[^\\p{Lu}-z]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[^\\p{Lu}-\\p{Ll}]",
+ syntaxError: synErrRangePropIsUnavailable,
+ },
+ {
+ pattern: "[^\\u{0000}-\\u{10FFFF}]",
+ syntaxError: synErrUnmatchablePattern,
+ },
+ {
+ pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]",
+ syntaxError: synErrUnmatchablePattern,
+ },
+ {
+ pattern: "[^]",
+ ast: newSymbolNode('^'),
+ },
+ {
+ pattern: "[",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[a",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([a",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[a-",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([a-",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[^",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([^",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[^a",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([^a",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[^a-",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([^a-",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "]",
+ ast: newSymbolNode(']'),
+ },
+ {
+ pattern: "(]",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "a]",
+ ast: genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode(']'),
+ ),
+ },
+ {
+ pattern: "(a]",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "([)",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "([a)",
+ syntaxError: synErrBExpUnclosed,
+ },
+ {
+ pattern: "[a-]",
+ ast: genAltNode(
+ newSymbolNode('a'),
+ newSymbolNode('-'),
+ ),
+ },
+ {
+ pattern: "[^a-]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, 0x2C),
+ newRangeSymbolNode(0x2E, 0x60),
+ newRangeSymbolNode(0x62, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[-z]",
+ ast: genAltNode(
+ newSymbolNode('-'),
+ newSymbolNode('z'),
+ ),
+ },
+ {
+ pattern: "[^-z]",
+ ast: newAltNode(
+ newRangeSymbolNode(0x00, 0x2C),
+ newAltNode(
+ newRangeSymbolNode(0x2E, 0x79),
+ newRangeSymbolNode(0x7B, 0x10FFFF),
+ ),
+ ),
+ },
+ {
+ pattern: "[-]",
+ ast: newSymbolNode('-'),
+ },
+ {
+ pattern: "[^-]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, 0x2C),
+ newRangeSymbolNode(0x2E, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^01]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, '0'-1),
+ newRangeSymbolNode('1'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^10]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, '0'-1),
+ newRangeSymbolNode('1'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^a-z]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, 'a'-1),
+ newRangeSymbolNode('z'+1, 0x10FFFF),
+ ),
+ },
+ {
+ pattern: "[^az]",
+ ast: genAltNode(
+ newRangeSymbolNode(0x00, 'a'-1),
+ genAltNode(
+ newRangeSymbolNode('a'+1, 'z'-1),
+ newRangeSymbolNode('z'+1, 0x10FFFF),
+ ),
+ ),
+ },
+ {
+ pattern: "\\u{006E}",
+ ast: newSymbolNode('\u006E'),
+ },
+ {
+ pattern: "\\u{03BD}",
+ ast: newSymbolNode('\u03BD'),
+ },
+ {
+ pattern: "\\u{306B}",
+ ast: newSymbolNode('\u306B'),
+ },
+ {
+ pattern: "\\u{01F638}",
+ ast: newSymbolNode('\U0001F638'),
+ },
+ {
+ pattern: "\\u{0000}",
+ ast: newSymbolNode('\u0000'),
+ },
+ {
+ pattern: "\\u{10FFFF}",
+ ast: newSymbolNode('\U0010FFFF'),
+ },
+ {
+ pattern: "\\u{110000}",
+ syntaxError: synErrCPExpOutOfRange,
+ },
+ {
+ pattern: "\\u",
+ syntaxError: synErrCPExpInvalidForm,
+ },
+ {
+ pattern: "\\u{",
+ syntaxError: synErrCPExpInvalidForm,
+ },
+ {
+ pattern: "\\u{03BD",
+ syntaxError: synErrCPExpInvalidForm,
+ },
+ {
+ pattern: "\\u{}",
+ syntaxError: synErrCPExpInvalidForm,
+ },
+ {
+ pattern: "\\p{Letter}",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\p{General_Category=Letter}",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\p{ Letter }",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\p{ General_Category = Letter }",
+ skipTestAST: true,
+ },
+ {
+ pattern: "\\p",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{Letter",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{General_Category=}",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{General_Category= }",
+ syntaxError: synErrCharPropInvalidSymbol,
+ },
+ {
+ pattern: "\\p{=Letter}",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{ =Letter}",
+ syntaxError: synErrCharPropInvalidSymbol,
+ },
+ {
+ pattern: "\\p{=}",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\p{}",
+ syntaxError: synErrCharPropExpInvalidForm,
+ },
+ {
+ pattern: "\\f{a2c}",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\f{ a2c }",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ ast: newFragmentNode("a2c",
+ genConcatNode(
+ newSymbolNode('a'),
+ newSymbolNode('b'),
+ newSymbolNode('c'),
+ ),
+ ),
+ },
+ {
+ pattern: "\\f",
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
+ pattern: "\\f{",
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
+ pattern: "\\f{a2c",
+ fragments: map[spec.LexKindName]string{
+ "a2c": "abc",
+ },
+ syntaxError: synErrFragmentExpInvalidForm,
+ },
+ {
+ pattern: "(a)",
+ ast: newSymbolNode('a'),
+ },
+ {
+ pattern: "(((a)))",
+ ast: newSymbolNode('a'),
+ },
+ {
+ pattern: "a()",
+ syntaxError: synErrGroupNoElem,
+ },
+ {
+ pattern: "()a",
+ syntaxError: synErrGroupNoElem,
+ },
+ {
+ pattern: "()",
+ syntaxError: synErrGroupNoElem,
+ },
+ {
+ pattern: "(",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "a(",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "(a",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "((",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: "((a)",
+ syntaxError: synErrGroupUnclosed,
+ },
+ {
+ pattern: ")",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: "a)",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: ")a",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: "))",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: "(a))",
+ syntaxError: synErrGroupNoInitiator,
+ },
+ {
+ pattern: "Mulder|Scully",
+ ast: genAltNode(
+ genConcatNode(
+ newSymbolNode('M'),
+ newSymbolNode('u'),
+ newSymbolNode('l'),
+ newSymbolNode('d'),
+ newSymbolNode('e'),
+ newSymbolNode('r'),
+ ),
+ genConcatNode(
+ newSymbolNode('S'),
+ newSymbolNode('c'),
+ newSymbolNode('u'),
+ newSymbolNode('l'),
+ newSymbolNode('l'),
+ newSymbolNode('y'),
+ ),
+ ),
+ },
+ {
+ pattern: "Langly|Frohike|Byers",
+ ast: genAltNode(
+ genConcatNode(
+ newSymbolNode('L'),
+ newSymbolNode('a'),
+ newSymbolNode('n'),
+ newSymbolNode('g'),
+ newSymbolNode('l'),
+ newSymbolNode('y'),
+ ),
+ genConcatNode(
+ newSymbolNode('F'),
+ newSymbolNode('r'),
+ newSymbolNode('o'),
+ newSymbolNode('h'),
+ newSymbolNode('i'),
+ newSymbolNode('k'),
+ newSymbolNode('e'),
+ ),
+ genConcatNode(
+ newSymbolNode('B'),
+ newSymbolNode('y'),
+ newSymbolNode('e'),
+ newSymbolNode('r'),
+ newSymbolNode('s'),
+ ),
+ ),
+ },
+ {
+ pattern: "|",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "||",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Mulder|",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "|Scully",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Langly|Frohike|",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Langly||Byers",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "|Frohike|Byers",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "|Frohike|",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Fox(|)Mulder",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "(Fox|)Mulder",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ {
+ pattern: "Fox(|Mulder)",
+ syntaxError: synErrAltLackOfOperand,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) {
+ fragmentTrees := map[spec.LexKindName]CPTree{}
+ for kind, pattern := range tt.fragments {
+ p := NewParser(kind, strings.NewReader(pattern))
+ root, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ fragmentTrees[kind] = root
+ }
+ err := CompleteFragments(fragmentTrees)
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern))
+ root, err := p.Parse()
+ if tt.syntaxError != nil {
+ // printCPTree(os.Stdout, root, "", "")
+ if err != ParseErr {
+ t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
+ }
+ _, synErr := p.Error()
+ if synErr != tt.syntaxError {
+ t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr)
+ }
+ if root != nil {
+ t.Fatalf("tree must be nil")
+ }
+ } else {
+ if err != nil {
+ detail, cause := p.Error()
+ t.Fatalf("%v: %v: %v", err, cause, detail)
+ }
+ if root == nil {
+ t.Fatal("tree must be non-nil")
+ }
+
+ complete, err := ApplyFragments(root, fragmentTrees)
+ if err != nil {
+ t.Fatal(err)
+ }
+ if !complete {
+ t.Fatalf("incomplete fragments")
+ }
+
+ // printCPTree(os.Stdout, root, "", "")
+ if !tt.skipTestAST {
+ r := root.(*rootNode)
+ testAST(t, tt.ast, r.tree)
+ }
+ }
+ })
+ }
+}
+
+func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) {
+ for _, cProp := range ucd.ContributoryProperties() {
+ t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) {
+ p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp)))
+ root, err := p.Parse()
+ if err == nil {
+ t.Fatalf("expected syntax error: got: nil")
+ }
+ _, synErr := p.Error()
+ if synErr != synErrCharPropUnsupported {
+ t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr)
+ }
+ if root != nil {
+ t.Fatalf("tree is not nil")
+ }
+ })
+ }
+}
+
+func TestExclude(t *testing.T) {
+ for _, test := range []struct {
+ caption string
+ target CPTree
+ base CPTree
+ result CPTree
+ }{
+ // t.From > b.From && t.To < b.To
+
+ // |t.From - b.From| = 1
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+ +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1",
+ target: newSymbolNode('1'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newAltNode(
+ newSymbolNode('0'),
+ newSymbolNode('2'),
+ ),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+--+--+--+
+ // Result (b - t): +--+--+ +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1",
+ target: newSymbolNode('2'),
+ base: newRangeSymbolNode('0', '4'),
+ result: newAltNode(
+ newRangeSymbolNode('0', '1'),
+ newRangeSymbolNode('3', '4'),
+ ),
+ },
+
+ // t.From <= b.From && t.To >= b.From && t.To < b.To
+
+ // |b.From - t.From| = 0
+ // |t.To - b.From| = 0
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
+ target: newSymbolNode('0'),
+ base: newRangeSymbolNode('0', '1'),
+ result: newSymbolNode('1'),
+ },
+ // |b.From - t.From| = 0
+ // |t.To - b.From| = 0
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
+ target: newSymbolNode('0'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newRangeSymbolNode('1', '2'),
+ },
+ // |b.From - t.From| = 0
+ // |t.To - b.From| > 0
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
+ target: newRangeSymbolNode('0', '1'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newSymbolNode('2'),
+ },
+ // |b.From - t.From| = 0
+ // |t.To - b.From| > 0
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
+ target: newRangeSymbolNode('0', '1'),
+ base: newRangeSymbolNode('0', '3'),
+ result: newRangeSymbolNode('2', '3'),
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.From| = 0
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
+ target: newRangeSymbolNode('0', '1'),
+ base: newRangeSymbolNode('1', '2'),
+ result: newSymbolNode('2'),
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.From| = 0
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
+ target: newRangeSymbolNode('0', '1'),
+ base: newRangeSymbolNode('1', '3'),
+ result: newRangeSymbolNode('2', '3'),
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.From| > 0
+ // |b.To - t.To| = 1
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
+ target: newRangeSymbolNode('0', '2'),
+ base: newRangeSymbolNode('1', '3'),
+ result: newSymbolNode('3'),
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.From| > 0
+ // |b.To - t.To| > 1
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
+ target: newRangeSymbolNode('0', '2'),
+ base: newRangeSymbolNode('1', '4'),
+ result: newRangeSymbolNode('3', '4'),
+ },
+
+ // t.From > b.From && t.From <= b.To && t.To >= b.To
+
+ // |t.From - b.From| = 1
+ // |b.To - t.From| = 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
+ target: newSymbolNode('1'),
+ base: newRangeSymbolNode('0', '1'),
+ result: newSymbolNode('0'),
+ },
+ // |t.From - b.From| = 1
+ // |b.To - t.From| = 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('1', '2'),
+ base: newRangeSymbolNode('0', '1'),
+ result: newSymbolNode('0'),
+ },
+ // |t.From - b.From| = 1
+ // |b.To - t.From| > 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
+ target: newRangeSymbolNode('1', '2'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newSymbolNode('0'),
+ },
+ // |t.From - b.From| = 1
+ // |b.To - t.From| > 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('1', '3'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newSymbolNode('0'),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.From| = 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
+ target: newSymbolNode('2'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newRangeSymbolNode('0', '1'),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.From| = 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('2', '3'),
+ base: newRangeSymbolNode('0', '2'),
+ result: newRangeSymbolNode('0', '1'),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.From| > 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
+ target: newRangeSymbolNode('2', '3'),
+ base: newRangeSymbolNode('0', '3'),
+ result: newRangeSymbolNode('0', '1'),
+ },
+ // |t.From - b.From| > 1
+ // |b.To - t.From| > 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+--+--+--+
+ // Result (b - t): +--+--+
+ {
+ caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('2', '4'),
+ base: newRangeSymbolNode('0', '3'),
+ result: newRangeSymbolNode('0', '1'),
+ },
+
+ // t.From <= b.From && t.To >= b.To
+
+ // |b.From - t.From| = 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): N/A
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0",
+ target: newSymbolNode('0'),
+ base: newSymbolNode('0'),
+ result: nil,
+ },
+ // |b.From - t.From| = 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+
+ // Result (b - t): N/A
+ {
+ caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('0', '1'),
+ base: newSymbolNode('0'),
+ result: nil,
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.To| = 0
+ //
+ // Target (t): +--+--+
+ // Base (b): +--+
+ // Result (b - t): N/A
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0",
+ target: newRangeSymbolNode('0', '1'),
+ base: newSymbolNode('1'),
+ result: nil,
+ },
+ // |b.From - t.From| > 0
+ // |t.To - b.To| > 0
+ //
+ // Target (t): +--+--+--+
+ // Base (b): +--+
+ // Result (b - t): N/A
+ {
+ caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0",
+ target: newRangeSymbolNode('0', '2'),
+ base: newSymbolNode('1'),
+ result: nil,
+ },
+
+ // Others
+
+ // |b.From - t.From| = 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| = 1",
+ target: newSymbolNode('0'),
+ base: newSymbolNode('1'),
+ result: newSymbolNode('1'),
+ },
+ // |b.From - t.From| > 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): +--+
+ {
+ caption: "|b.From - t.From| > 1",
+ target: newSymbolNode('0'),
+ base: newSymbolNode('2'),
+ result: newSymbolNode('2'),
+ },
+ // |t.To - b.To| = 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.To - b.To| = 1",
+ target: newSymbolNode('1'),
+ base: newSymbolNode('0'),
+ result: newSymbolNode('0'),
+ },
+ // |t.To - b.To| > 1
+ //
+ // Target (t): +--+
+ // Base (b): +--+
+ // Result (b - t): +--+
+ {
+ caption: "|t.To - b.To| > 1",
+ target: newSymbolNode('2'),
+ base: newSymbolNode('0'),
+ result: newSymbolNode('0'),
+ },
+ } {
+ t.Run(test.caption, func(t *testing.T) {
+ r := exclude(test.target, test.base)
+ testAST(t, test.result, r)
+ })
+ }
+}
+
+func testAST(t *testing.T, expected, actual CPTree) {
+ t.Helper()
+
+ aTy := reflect.TypeOf(actual)
+ eTy := reflect.TypeOf(expected)
+ if eTy != aTy {
+ t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy)
+ }
+
+ if actual == nil {
+ return
+ }
+
+ switch e := expected.(type) {
+ case *symbolNode:
+ a := actual.(*symbolNode)
+ if a.From != e.From || a.To != e.To {
+ t.Fatalf("unexpected node: want: %+v, got: %+v", e, a)
+ }
+ }
+ eLeft, eRight := expected.children()
+ aLeft, aRight := actual.children()
+ testAST(t, eLeft, aLeft)
+ testAST(t, eRight, aRight)
+}
diff --git a/grammar/lexical/parser/tree.go b/grammar/lexical/parser/tree.go
new file mode 100644
index 0000000..3d9d197
--- /dev/null
+++ b/grammar/lexical/parser/tree.go
@@ -0,0 +1,459 @@
+package parser
+
+import (
+ "fmt"
+ "io"
+ "sort"
+
+ spec "github.com/nihei9/vartan/spec/grammar"
+)
+
+type CPRange struct {
+ From rune
+ To rune
+}
+
+type CPTree interface {
+ fmt.Stringer
+ Range() (rune, rune, bool)
+ Optional() (CPTree, bool)
+ Repeatable() (CPTree, bool)
+ Concatenation() (CPTree, CPTree, bool)
+ Alternatives() (CPTree, CPTree, bool)
+ Describe() (spec.LexKindName, []spec.LexKindName, error)
+
+ children() (CPTree, CPTree)
+ clone() CPTree
+}
+
+var (
+ _ CPTree = &rootNode{}
+ _ CPTree = &symbolNode{}
+ _ CPTree = &concatNode{}
+ _ CPTree = &altNode{}
+ _ CPTree = &quantifierNode{}
+ _ CPTree = &fragmentNode{}
+)
+
+type rootNode struct {
+ kind spec.LexKindName
+ tree CPTree
+ fragments map[spec.LexKindName][]*fragmentNode
+}
+
+func newRootNode(kind spec.LexKindName, t CPTree) *rootNode {
+ fragments := map[spec.LexKindName][]*fragmentNode{}
+ collectFragments(t, fragments)
+
+ return &rootNode{
+ kind: kind,
+ tree: t,
+ fragments: fragments,
+ }
+}
+
+func collectFragments(n CPTree, fragments map[spec.LexKindName][]*fragmentNode) {
+ if n == nil {
+ return
+ }
+
+ if f, ok := n.(*fragmentNode); ok {
+ fragments[f.kind] = append(fragments[f.kind], f)
+ return
+ }
+
+ l, r := n.children()
+ collectFragments(l, fragments)
+ collectFragments(r, fragments)
+}
+
+func (n *rootNode) String() string {
+ return fmt.Sprintf("root: %v: %v fragments", n.kind, len(n.fragments))
+}
+
+func (n *rootNode) Range() (rune, rune, bool) {
+ return n.tree.Range()
+}
+
+func (n *rootNode) Optional() (CPTree, bool) {
+ return n.tree.Optional()
+}
+
+func (n *rootNode) Repeatable() (CPTree, bool) {
+ return n.tree.Repeatable()
+}
+
+func (n *rootNode) Concatenation() (CPTree, CPTree, bool) {
+ return n.tree.Concatenation()
+}
+
+func (n *rootNode) Alternatives() (CPTree, CPTree, bool) {
+ return n.tree.Alternatives()
+}
+
+func (n *rootNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
+ var frags []spec.LexKindName
+ for f := range n.fragments {
+ frags = append(frags, spec.LexKindName(f))
+ }
+ sort.Slice(frags, func(i, j int) bool {
+ return frags[i] < frags[j]
+ })
+
+ return n.kind, frags, nil
+}
+
+func (n *rootNode) children() (CPTree, CPTree) {
+ return n.tree.children()
+}
+
+func (n *rootNode) clone() CPTree {
+ return n.tree.clone()
+}
+
+func (n *rootNode) incomplete() bool {
+ return len(n.fragments) > 0
+}
+
+func (n *rootNode) applyFragment(kind spec.LexKindName, fragment CPTree) error {
+ root, ok := fragment.(*rootNode)
+ if !ok {
+ return fmt.Errorf("applyFragment can take only *rootNode: %T", fragment)
+ }
+ if root.incomplete() {
+ return fmt.Errorf("fragment is incomplete")
+ }
+
+ fs, ok := n.fragments[kind]
+ if !ok {
+ return nil
+ }
+ for _, f := range fs {
+ f.tree = root.clone()
+ }
+ delete(n.fragments, kind)
+
+ return nil
+}
+
+type symbolNode struct {
+ CPRange
+}
+
+func newSymbolNode(cp rune) *symbolNode {
+ return &symbolNode{
+ CPRange: CPRange{
+ From: cp,
+ To: cp,
+ },
+ }
+}
+
+func newRangeSymbolNode(from, to rune) *symbolNode {
+ return &symbolNode{
+ CPRange: CPRange{
+ From: from,
+ To: to,
+ },
+ }
+}
+
+func (n *symbolNode) String() string {
+ return fmt.Sprintf("symbol: %X..%X", n.From, n.To)
+}
+
+func (n *symbolNode) Range() (rune, rune, bool) {
+ return n.From, n.To, true
+}
+
+func (n *symbolNode) Optional() (CPTree, bool) {
+ return nil, false
+}
+
+func (n *symbolNode) Repeatable() (CPTree, bool) {
+ return nil, false
+}
+
+func (n *symbolNode) Concatenation() (CPTree, CPTree, bool) {
+ return nil, nil, false
+}
+
+func (n *symbolNode) Alternatives() (CPTree, CPTree, bool) {
+ return nil, nil, false
+}
+
+func (n *symbolNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
+ return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
+}
+
+func (n *symbolNode) children() (CPTree, CPTree) {
+ return nil, nil
+}
+
+func (n *symbolNode) clone() CPTree {
+ return newRangeSymbolNode(n.From, n.To)
+}
+
+type concatNode struct {
+ left CPTree
+ right CPTree
+}
+
+func newConcatNode(left, right CPTree) *concatNode {
+ return &concatNode{
+ left: left,
+ right: right,
+ }
+}
+
+func (n *concatNode) String() string {
+ return "concat"
+}
+
+func (n *concatNode) Range() (rune, rune, bool) {
+ return 0, 0, false
+}
+
+func (n *concatNode) Optional() (CPTree, bool) {
+ return nil, false
+}
+
+func (n *concatNode) Repeatable() (CPTree, bool) {
+ return nil, false
+}
+
+func (n *concatNode) Concatenation() (CPTree, CPTree, bool) {
+ return n.left, n.right, true
+}
+
+func (n *concatNode) Alternatives() (CPTree, CPTree, bool) {
+ return nil, nil, false
+}
+
+func (n *concatNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
+ return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
+}
+
+func (n *concatNode) children() (CPTree, CPTree) {
+ return n.left, n.right
+}
+
+func (n *concatNode) clone() CPTree {
+ if n == nil {
+ return nil
+ }
+ return newConcatNode(n.left.clone(), n.right.clone())
+}
+
+type altNode struct {
+ left CPTree
+ right CPTree
+}
+
+func newAltNode(left, right CPTree) *altNode {
+ return &altNode{
+ left: left,
+ right: right,
+ }
+}
+
+func (n *altNode) String() string {
+ return "alt"
+}
+
+func (n *altNode) Range() (rune, rune, bool) {
+ return 0, 0, false
+}
+
+func (n *altNode) Optional() (CPTree, bool) {
+ return nil, false
+}
+
+func (n *altNode) Repeatable() (CPTree, bool) {
+ return nil, false
+}
+
+func (n *altNode) Concatenation() (CPTree, CPTree, bool) {
+ return nil, nil, false
+}
+
+func (n *altNode) Alternatives() (CPTree, CPTree, bool) {
+ return n.left, n.right, true
+}
+
+func (n *altNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
+ return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
+}
+
+func (n *altNode) children() (CPTree, CPTree) {
+ return n.left, n.right
+}
+
+func (n *altNode) clone() CPTree {
+ return newAltNode(n.left.clone(), n.right.clone())
+}
+
+type quantifierNode struct {
+ optional bool
+ repeatable bool
+ tree CPTree
+}
+
+func (n *quantifierNode) String() string {
+ switch {
+ case n.repeatable:
+ return "repeatable (>= 0 times)"
+ case n.optional:
+ return "optional (0 or 1 times)"
+ default:
+ return "invalid quantifier"
+ }
+}
+
+func newRepeatNode(t CPTree) *quantifierNode {
+ return &quantifierNode{
+ repeatable: true,
+ tree: t,
+ }
+}
+
+func newRepeatOneOrMoreNode(t CPTree) *concatNode {
+ return newConcatNode(
+ t,
+ &quantifierNode{
+ repeatable: true,
+ tree: t.clone(),
+ })
+}
+
+func newOptionNode(t CPTree) *quantifierNode {
+ return &quantifierNode{
+ optional: true,
+ tree: t,
+ }
+}
+
+func (n *quantifierNode) Range() (rune, rune, bool) {
+ return 0, 0, false
+}
+
+func (n *quantifierNode) Optional() (CPTree, bool) {
+ return n.tree, n.optional
+}
+
+func (n *quantifierNode) Repeatable() (CPTree, bool) {
+ return n.tree, n.repeatable
+}
+
+func (n *quantifierNode) Concatenation() (CPTree, CPTree, bool) {
+ return nil, nil, false
+}
+
+func (n *quantifierNode) Alternatives() (CPTree, CPTree, bool) {
+ return nil, nil, false
+}
+
+func (n *quantifierNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
+ return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
+}
+
+func (n *quantifierNode) children() (CPTree, CPTree) {
+ return n.tree, nil
+}
+
+func (n *quantifierNode) clone() CPTree {
+ if n.repeatable {
+ return newRepeatNode(n.tree.clone())
+ }
+ return newOptionNode(n.tree.clone())
+}
+
+type fragmentNode struct {
+ kind spec.LexKindName
+ tree CPTree
+}
+
+func newFragmentNode(kind spec.LexKindName, t CPTree) *fragmentNode {
+ return &fragmentNode{
+ kind: kind,
+ tree: t,
+ }
+}
+
+func (n *fragmentNode) String() string {
+ return fmt.Sprintf("fragment: %v", n.kind)
+}
+
+func (n *fragmentNode) Range() (rune, rune, bool) {
+ return n.tree.Range()
+}
+
+func (n *fragmentNode) Optional() (CPTree, bool) {
+ return n.tree.Optional()
+}
+
+func (n *fragmentNode) Repeatable() (CPTree, bool) {
+ return n.tree.Repeatable()
+}
+
+func (n *fragmentNode) Concatenation() (CPTree, CPTree, bool) {
+ return n.tree.Concatenation()
+}
+
+func (n *fragmentNode) Alternatives() (CPTree, CPTree, bool) {
+ return n.tree.Alternatives()
+}
+
+func (n *fragmentNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
+ return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
+}
+
+func (n *fragmentNode) children() (CPTree, CPTree) {
+ return n.tree.children()
+}
+
+func (n *fragmentNode) clone() CPTree {
+ if n.tree == nil {
+ return newFragmentNode(n.kind, nil)
+ }
+ return newFragmentNode(n.kind, n.tree.clone())
+}
+
+//nolint:unused
+func printCPTree(w io.Writer, t CPTree, ruledLine string, childRuledLinePrefix string) {
+ if t == nil {
+ return
+ }
+ fmt.Fprintf(w, "%v%v\n", ruledLine, t)
+ children := []CPTree{}
+ switch n := t.(type) {
+ case *rootNode:
+ children = append(children, n.tree)
+ case *fragmentNode:
+ children = append(children, n.tree)
+ default:
+ left, right := t.children()
+ if left != nil {
+ children = append(children, left)
+ }
+ if right != nil {
+ children = append(children, right)
+ }
+ }
+ num := len(children)
+ for i, child := range children {
+ line := "└─ "
+ if num > 1 {
+ if i == 0 {
+ line = "├─ "
+ } else if i < num-1 {
+ line = "│ "
+ }
+ }
+ prefix := "│ "
+ if i >= num-1 {
+ prefix = " "
+ }
+ printCPTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix)
+ }
+}
diff --git a/grammar/lr0.go b/grammar/lr0.go
index dea5254..77ad2e0 100644
--- a/grammar/lr0.go
+++ b/grammar/lr0.go
@@ -3,6 +3,8 @@ package grammar
import (
"fmt"
"sort"
+
+ "github.com/nihei9/vartan/grammar/symbol"
)
type lr0Automaton struct {
@@ -10,8 +12,8 @@ type lr0Automaton struct {
states map[kernelID]*lrState
}
-func genLR0Automaton(prods *productionSet, startSym symbol, errSym symbol) (*lr0Automaton, error) {
- if !startSym.isStart() {
+func genLR0Automaton(prods *productionSet, startSym symbol.Symbol, errSym symbol.Symbol) (*lr0Automaton, error) {
+ if !startSym.IsStart() {
return nil, fmt.Errorf("passed symbold is not a start symbol")
}
@@ -67,7 +69,7 @@ func genLR0Automaton(prods *productionSet, startSym symbol, errSym symbol) (*lr0
return automaton, nil
}
-func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol) (*lrState, []*kernel, error) {
+func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol.Symbol) (*lrState, []*kernel, error) {
items, err := genLR0Closure(k, prods)
if err != nil {
return nil, nil, err
@@ -77,7 +79,7 @@ func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol)
return nil, nil, err
}
- next := map[symbol]kernelID{}
+ next := map[symbol.Symbol]kernelID{}
kernels := []*kernel{}
for _, n := range neighbours {
next[n.symbol] = n.kernel.id
@@ -125,7 +127,7 @@ func genLR0Closure(k *kernel, prods *productionSet) ([]*lrItem, error) {
for len(uncheckedItems) > 0 {
nextUncheckedItems := []*lrItem{}
for _, item := range uncheckedItems {
- if item.dottedSymbol.isTerminal() {
+ if item.dottedSymbol.IsTerminal() {
continue
}
@@ -150,14 +152,14 @@ func genLR0Closure(k *kernel, prods *productionSet) ([]*lrItem, error) {
}
type neighbourKernel struct {
- symbol symbol
+ symbol symbol.Symbol
kernel *kernel
}
func genNeighbourKernels(items []*lrItem, prods *productionSet) ([]*neighbourKernel, error) {
- kItemMap := map[symbol][]*lrItem{}
+ kItemMap := map[symbol.Symbol][]*lrItem{}
for _, item := range items {
- if item.dottedSymbol.isNil() {
+ if item.dottedSymbol.IsNil() {
continue
}
prod, ok := prods.findByID(item.prod)
@@ -171,7 +173,7 @@ func genNeighbourKernels(items []*lrItem, prods *productionSet) ([]*neighbourKer
kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem)
}
- nextSyms := []symbol{}
+ nextSyms := []symbol.Symbol{}
for sym := range kItemMap {
nextSyms = append(nextSyms, sym)
}
diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go
index 0d0b134..99d4e5b 100644
--- a/grammar/lr0_test.go
+++ b/grammar/lr0_test.go
@@ -5,12 +5,13 @@ import (
"strings"
"testing"
- spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/grammar/symbol"
+ "github.com/nihei9/vartan/spec/grammar/parser"
)
type expectedLRState struct {
kernelItems []*lrItem
- nextStates map[symbol][]*lrItem
+ nextStates map[symbol.Symbol][]*lrItem
reducibleProds []*production
emptyProdItems []*lrItem
}
@@ -41,15 +42,14 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
var gram *Grammar
var automaton *lr0Automaton
{
- ast, err := spec.Parse(strings.NewReader(src))
+ ast, err := parser.Parse(strings.NewReader(src))
if err != nil {
t.Fatal(err)
}
b := GrammarBuilder{
AST: ast,
}
-
- gram, err = b.Build()
+ gram, err = b.build()
if err != nil {
t.Fatal(err)
}
@@ -118,7 +118,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
expectedStates := []*expectedLRState{
{
kernelItems: expectedKernels[0],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("expr"): expectedKernels[1],
genSym("term"): expectedKernels[2],
genSym("factor"): expectedKernels[3],
@@ -129,7 +129,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
{
kernelItems: expectedKernels[1],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("add"): expectedKernels[6],
},
reducibleProds: []*production{
@@ -138,7 +138,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
{
kernelItems: expectedKernels[2],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("mul"): expectedKernels[7],
},
reducibleProds: []*production{
@@ -147,14 +147,14 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
{
kernelItems: expectedKernels[3],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("term", "factor"),
},
},
{
kernelItems: expectedKernels[4],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("expr"): expectedKernels[8],
genSym("term"): expectedKernels[2],
genSym("factor"): expectedKernels[3],
@@ -165,14 +165,14 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
{
kernelItems: expectedKernels[5],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("factor", "id"),
},
},
{
kernelItems: expectedKernels[6],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("term"): expectedKernels[9],
genSym("factor"): expectedKernels[3],
genSym("l_paren"): expectedKernels[4],
@@ -182,7 +182,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
{
kernelItems: expectedKernels[7],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("factor"): expectedKernels[10],
genSym("l_paren"): expectedKernels[4],
genSym("id"): expectedKernels[5],
@@ -191,7 +191,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
{
kernelItems: expectedKernels[8],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("add"): expectedKernels[6],
genSym("r_paren"): expectedKernels[11],
},
@@ -199,7 +199,7 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
{
kernelItems: expectedKernels[9],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("mul"): expectedKernels[7],
},
reducibleProds: []*production{
@@ -208,14 +208,14 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
},
{
kernelItems: expectedKernels[10],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("term", "term", "mul", "factor"),
},
},
{
kernelItems: expectedKernels[11],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("factor", "l_paren", "expr", "r_paren"),
},
@@ -246,7 +246,7 @@ b: "bar";
var gram *Grammar
var automaton *lr0Automaton
{
- ast, err := spec.Parse(strings.NewReader(src))
+ ast, err := parser.Parse(strings.NewReader(src))
if err != nil {
t.Fatal(err)
}
@@ -254,7 +254,7 @@ b: "bar";
b := GrammarBuilder{
AST: ast,
}
- gram, err = b.Build()
+ gram, err = b.build()
if err != nil {
t.Fatal(err)
}
@@ -298,7 +298,7 @@ b: "bar";
expectedStates := []*expectedLRState{
{
kernelItems: expectedKernels[0],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("s"): expectedKernels[1],
genSym("foo"): expectedKernels[2],
},
@@ -311,14 +311,14 @@ b: "bar";
},
{
kernelItems: expectedKernels[1],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("s'", "s"),
},
},
{
kernelItems: expectedKernels[2],
- nextStates: map[symbol][]*lrItem{
+ nextStates: map[symbol.Symbol][]*lrItem{
genSym("bar"): expectedKernels[3],
genSym("b"): expectedKernels[4],
},
@@ -331,14 +331,14 @@ b: "bar";
},
{
kernelItems: expectedKernels[3],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("s", "foo", "bar"),
},
},
{
kernelItems: expectedKernels[4],
- nextStates: map[symbol][]*lrItem{},
+ nextStates: map[symbol.Symbol][]*lrItem{},
reducibleProds: []*production{
genProd("bar", "b"),
},
diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go
index 93033a3..53f692e 100644
--- a/grammar/parsing_table.go
+++ b/grammar/parsing_table.go
@@ -4,6 +4,7 @@ import (
"fmt"
"sort"
+ "github.com/nihei9/vartan/grammar/symbol"
spec "github.com/nihei9/vartan/spec/grammar"
)
@@ -82,7 +83,7 @@ type conflict interface {
type shiftReduceConflict struct {
state stateNum
- sym symbol
+ sym symbol.Symbol
nextState stateNum
prodNum productionNum
resolvedBy conflictResolutionMethod
@@ -93,7 +94,7 @@ func (c *shiftReduceConflict) conflict() {
type reduceReduceConflict struct {
state stateNum
- sym symbol
+ sym symbol.Symbol
prodNum1 productionNum
prodNum2 productionNum
resolvedBy conflictResolutionMethod
@@ -123,12 +124,12 @@ type ParsingTable struct {
InitialState stateNum
}
-func (t *ParsingTable) getAction(state stateNum, sym symbolNum) (ActionType, stateNum, productionNum) {
+func (t *ParsingTable) getAction(state stateNum, sym symbol.SymbolNum) (ActionType, stateNum, productionNum) {
pos := state.Int()*t.terminalCount + sym.Int()
return t.actionTable[pos].describe()
}
-func (t *ParsingTable) getGoTo(state stateNum, sym symbolNum) (GoToType, stateNum) {
+func (t *ParsingTable) getGoTo(state stateNum, sym symbol.SymbolNum) (GoToType, stateNum) {
pos := state.Int()*t.nonTerminalCount + sym.Int()
return t.goToTable[pos].describe()
}
@@ -141,8 +142,8 @@ func (t *ParsingTable) writeAction(row int, col int, act actionEntry) {
t.actionTable[row*t.terminalCount+col] = act
}
-func (t *ParsingTable) writeGoTo(state stateNum, sym symbol, nextState stateNum) {
- pos := state.Int()*t.nonTerminalCount + sym.num().Int()
+func (t *ParsingTable) writeGoTo(state stateNum, sym symbol.Symbol, nextState stateNum) {
+ pos := state.Int()*t.nonTerminalCount + sym.Num().Int()
t.goToTable[pos] = newGoToEntry(nextState)
}
@@ -151,7 +152,7 @@ type lrTableBuilder struct {
prods *productionSet
termCount int
nonTermCount int
- symTab *symbolTableReader
+ symTab *symbol.SymbolTableReader
precAndAssoc *precAndAssoc
conflicts []conflict
@@ -179,7 +180,7 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) {
for sym, kID := range state.next {
nextState := b.automaton.states[kID]
- if sym.isTerminal() {
+ if sym.IsTerminal() {
b.writeShiftAction(ptab, state.num, sym, nextState.num)
} else {
ptab.writeGoTo(state.num, sym, nextState.num)
@@ -226,12 +227,12 @@ func (b *lrTableBuilder) build() (*ParsingTable, error) {
// writeShiftAction writes a shift action to the parsing table. When a shift/reduce conflict occurred,
// we prioritize the shift action.
-func (b *lrTableBuilder) writeShiftAction(tab *ParsingTable, state stateNum, sym symbol, nextState stateNum) {
- act := tab.readAction(state.Int(), sym.num().Int())
+func (b *lrTableBuilder) writeShiftAction(tab *ParsingTable, state stateNum, sym symbol.Symbol, nextState stateNum) {
+ act := tab.readAction(state.Int(), sym.Num().Int())
if !act.isEmpty() {
ty, _, p := act.describe()
if ty == ActionTypeReduce {
- act, method := b.resolveSRConflict(sym.num(), p)
+ act, method := b.resolveSRConflict(sym.Num(), p)
b.conflicts = append(b.conflicts, &shiftReduceConflict{
state: state,
sym: sym,
@@ -240,19 +241,19 @@ func (b *lrTableBuilder) writeShiftAction(tab *ParsingTable, state stateNum, sym
resolvedBy: method,
})
if act == ActionTypeShift {
- tab.writeAction(state.Int(), sym.num().Int(), newShiftActionEntry(nextState))
+ tab.writeAction(state.Int(), sym.Num().Int(), newShiftActionEntry(nextState))
}
return
}
}
- tab.writeAction(state.Int(), sym.num().Int(), newShiftActionEntry(nextState))
+ tab.writeAction(state.Int(), sym.Num().Int(), newShiftActionEntry(nextState))
}
// writeReduceAction writes a reduce action to the parsing table. When a shift/reduce conflict occurred,
// we prioritize the shift action, and when a reduce/reduce conflict we prioritize the action that reduces
// the production with higher priority. Productions defined earlier in the grammar file have a higher priority.
-func (b *lrTableBuilder) writeReduceAction(tab *ParsingTable, state stateNum, sym symbol, prod productionNum) {
- act := tab.readAction(state.Int(), sym.num().Int())
+func (b *lrTableBuilder) writeReduceAction(tab *ParsingTable, state stateNum, sym symbol.Symbol, prod productionNum) {
+ act := tab.readAction(state.Int(), sym.Num().Int())
if !act.isEmpty() {
ty, s, p := act.describe()
switch ty {
@@ -269,12 +270,12 @@ func (b *lrTableBuilder) writeReduceAction(tab *ParsingTable, state stateNum, sy
resolvedBy: ResolvedByProdOrder,
})
if p < prod {
- tab.writeAction(state.Int(), sym.num().Int(), newReduceActionEntry(p))
+ tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(p))
} else {
- tab.writeAction(state.Int(), sym.num().Int(), newReduceActionEntry(prod))
+ tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod))
}
case ActionTypeShift:
- act, method := b.resolveSRConflict(sym.num(), prod)
+ act, method := b.resolveSRConflict(sym.Num(), prod)
b.conflicts = append(b.conflicts, &shiftReduceConflict{
state: state,
sym: sym,
@@ -283,15 +284,15 @@ func (b *lrTableBuilder) writeReduceAction(tab *ParsingTable, state stateNum, sy
resolvedBy: method,
})
if act == ActionTypeReduce {
- tab.writeAction(state.Int(), sym.num().Int(), newReduceActionEntry(prod))
+ tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod))
}
}
return
}
- tab.writeAction(state.Int(), sym.num().Int(), newReduceActionEntry(prod))
+ tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod))
}
-func (b *lrTableBuilder) resolveSRConflict(sym symbolNum, prod productionNum) (ActionType, conflictResolutionMethod) {
+func (b *lrTableBuilder) resolveSRConflict(sym symbol.SymbolNum, prod productionNum) (ActionType, conflictResolutionMethod) {
symPrec := b.precAndAssoc.terminalPrecedence(sym)
prodPrec := b.precAndAssoc.productionPredence(prod)
if symPrec == 0 || prodPrec == 0 {
@@ -313,26 +314,26 @@ func (b *lrTableBuilder) resolveSRConflict(sym symbolNum, prod productionNum) (A
func (b *lrTableBuilder) genReport(tab *ParsingTable, gram *Grammar) (*spec.Report, error) {
var terms []*spec.Terminal
{
- termSyms := b.symTab.terminalSymbols()
+ termSyms := b.symTab.TerminalSymbols()
terms = make([]*spec.Terminal, len(termSyms)+1)
for _, sym := range termSyms {
- name, ok := b.symTab.toText(sym)
+ name, ok := b.symTab.ToText(sym)
if !ok {
return nil, fmt.Errorf("failed to generate terminals: symbol not found: %v", sym)
}
term := &spec.Terminal{
- Number: sym.num().Int(),
+ Number: sym.Num().Int(),
Name: name,
}
- prec := b.precAndAssoc.terminalPrecedence(sym.num())
+ prec := b.precAndAssoc.terminalPrecedence(sym.Num())
if prec != precNil {
term.Precedence = prec
}
- assoc := b.precAndAssoc.terminalAssociativity(sym.num())
+ assoc := b.precAndAssoc.terminalAssociativity(sym.Num())
switch assoc {
case assocTypeLeft:
term.Associativity = "l"
@@ -340,22 +341,22 @@ func (b *lrTableBuilder) genReport(tab *ParsingTable, gram *Grammar) (*spec.Repo
term.Associativity = "r"
}
- terms[sym.num()] = term
+ terms[sym.Num()] = term
}
}
var nonTerms []*spec.NonTerminal
{
- nonTermSyms := b.symTab.nonTerminalSymbols()
+ nonTermSyms := b.symTab.NonTerminalSymbols()
nonTerms = make([]*spec.NonTerminal, len(nonTermSyms)+1)
for _, sym := range nonTermSyms {
- name, ok := b.symTab.toText(sym)
+ name, ok := b.symTab.ToText(sym)
if !ok {
return nil, fmt.Errorf("failed to generate non-terminals: symbol not found: %v", sym)
}
- nonTerms[sym.num()] = &spec.NonTerminal{
- Number: sym.num().Int(),
+ nonTerms[sym.Num()] = &spec.NonTerminal{
+ Number: sym.Num().Int(),
Name: name,
}
}
@@ -368,16 +369,16 @@ func (b *lrTableBuilder) genReport(tab *ParsingTable, gram *Grammar) (*spec.Repo
for _, p := range ps {
rhs := make([]int, len(p.rhs))
for i, e := range p.rhs {
- if e.isTerminal() {
- rhs[i] = e.num().Int()
+ if e.IsTerminal() {
+ rhs[i] = e.Num().Int()
} else {
- rhs[i] = e.num().Int() * -1
+ rhs[i] = e.Num().Int() * -1
}
}
prod := &spec.Production{
Number: p.num.Int(),
- LHS: p.lhs.num().Int(),
+ LHS: p.lhs.Num().Int(),
RHS: rhs,
}
@@ -441,33 +442,33 @@ func (b *lrTableBuilder) genReport(tab *ParsingTable, gram *Grammar) (*spec.Repo
var goTo []*spec.Transition
{
TERMINALS_LOOP:
- for _, t := range b.symTab.terminalSymbols() {
- act, next, prod := tab.getAction(s.num, t.num())
+ for _, t := range b.symTab.TerminalSymbols() {
+ act, next, prod := tab.getAction(s.num, t.Num())
switch act {
case ActionTypeShift:
shift = append(shift, &spec.Transition{
- Symbol: t.num().Int(),
+ Symbol: t.Num().Int(),
State: next.Int(),
})
case ActionTypeReduce:
for _, r := range reduce {
if r.Production == prod.Int() {
- r.LookAhead = append(r.LookAhead, t.num().Int())
+ r.LookAhead = append(r.LookAhead, t.Num().Int())
continue TERMINALS_LOOP
}
}
reduce = append(reduce, &spec.Reduce{
- LookAhead: []int{t.num().Int()},
+ LookAhead: []int{t.Num().Int()},
Production: prod.Int(),
})
}
}
- for _, n := range b.symTab.nonTerminalSymbols() {
- ty, next := tab.getGoTo(s.num, n.num())
+ for _, n := range b.symTab.NonTerminalSymbols() {
+ ty, next := tab.getGoTo(s.num, n.Num())
if ty == GoToTypeRegistered {
goTo = append(goTo, &spec.Transition{
- Symbol: n.num().Int(),
+ Symbol: n.Num().Int(),
State: next.Int(),
})
}
@@ -489,13 +490,13 @@ func (b *lrTableBuilder) genReport(tab *ParsingTable, gram *Grammar) (*spec.Repo
{
for _, c := range srConflicts[s.num] {
conflict := &spec.SRConflict{
- Symbol: c.sym.num().Int(),
+ Symbol: c.sym.Num().Int(),
State: c.nextState.Int(),
Production: c.prodNum.Int(),
ResolvedBy: c.resolvedBy.Int(),
}
- ty, s, p := tab.getAction(s.num, c.sym.num())
+ ty, s, p := tab.getAction(s.num, c.sym.Num())
switch ty {
case ActionTypeShift:
n := s.Int()
@@ -514,13 +515,13 @@ func (b *lrTableBuilder) genReport(tab *ParsingTable, gram *Grammar) (*spec.Repo
for _, c := range rrConflicts[s.num] {
conflict := &spec.RRConflict{
- Symbol: c.sym.num().Int(),
+ Symbol: c.sym.Num().Int(),
Production1: c.prodNum1.Int(),
Production2: c.prodNum2.Int(),
ResolvedBy: c.resolvedBy.Int(),
}
- _, _, p := tab.getAction(s.num, c.sym.num())
+ _, _, p := tab.getAction(s.num, c.sym.Num())
conflict.AdoptedProduction = p.Int()
rr = append(rr, conflict)
diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go
index fe56722..ae829e6 100644
--- a/grammar/parsing_table_test.go
+++ b/grammar/parsing_table_test.go
@@ -5,13 +5,14 @@ import (
"strings"
"testing"
- spec "github.com/nihei9/vartan/spec/grammar"
+ "github.com/nihei9/vartan/grammar/symbol"
+ "github.com/nihei9/vartan/spec/grammar/parser"
)
type expectedState struct {
kernelItems []*lrItem
- acts map[symbol]testActionEntry
- goTos map[symbol][]*lrItem
+ acts map[symbol.Symbol]testActionEntry
+ goTos map[symbol.Symbol][]*lrItem
}
func TestGenLALRParsingTable(t *testing.T) {
@@ -32,14 +33,14 @@ id: "[A-Za-z0-9_]+";
var nonTermCount int
var termCount int
{
- ast, err := spec.Parse(strings.NewReader(src))
+ ast, err := parser.Parse(strings.NewReader(src))
if err != nil {
t.Fatal(err)
}
b := GrammarBuilder{
AST: ast,
}
- gram, err = b.Build()
+ gram, err = b.build()
if err != nil {
t.Fatal(err)
}
@@ -56,11 +57,11 @@ id: "[A-Za-z0-9_]+";
t.Fatal(err)
}
- nonTermTexts, err := gram.symbolTable.nonTerminalTexts()
+ nonTermTexts, err := gram.symbolTable.NonTerminalTexts()
if err != nil {
t.Fatal(err)
}
- termTexts, err := gram.symbolTable.terminalTexts()
+ termTexts, err := gram.symbolTable.TerminalTexts()
if err != nil {
t.Fatal(err)
}
@@ -89,42 +90,42 @@ id: "[A-Za-z0-9_]+";
expectedKernels := map[int][]*lrItem{
0: {
- withLookAhead(genLR0Item("s'", 0, "s"), symbolEOF),
+ withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
},
1: {
- withLookAhead(genLR0Item("s'", 1, "s"), symbolEOF),
+ withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
},
2: {
- withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbolEOF),
- withLookAhead(genLR0Item("r", 1, "l"), symbolEOF),
+ withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
+ withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
},
3: {
- withLookAhead(genLR0Item("s", 1, "r"), symbolEOF),
+ withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
},
4: {
- withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbolEOF),
+ withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
},
5: {
- withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbolEOF),
+ withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
},
6: {
- withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbolEOF),
+ withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
},
7: {
- withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbolEOF),
+ withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
},
8: {
- withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbolEOF),
+ withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
},
9: {
- withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbolEOF),
+ withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
},
}
expectedStates := []expectedState{
{
kernelItems: expectedKernels[0],
- acts: map[symbol]testActionEntry{
+ acts: map[symbol.Symbol]testActionEntry{
genSym("ref"): {
ty: ActionTypeShift,
nextState: expectedKernels[4],
@@ -134,7 +135,7 @@ id: "[A-Za-z0-9_]+";
nextState: expectedKernels[5],
},
},
- goTos: map[symbol][]*lrItem{
+ goTos: map[symbol.Symbol][]*lrItem{
genSym("s"): expectedKernels[1],
genSym("l"): expectedKernels[2],
genSym("r"): expectedKernels[3],
@@ -142,8 +143,8 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[1],
- acts: map[symbol]testActionEntry{
- symbolEOF: {
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
ty: ActionTypeReduce,
production: genProd("s'", "s"),
},
@@ -151,12 +152,12 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[2],
- acts: map[symbol]testActionEntry{
+ acts: map[symbol.Symbol]testActionEntry{
genSym("eq"): {
ty: ActionTypeShift,
nextState: expectedKernels[6],
},
- symbolEOF: {
+ symbol.SymbolEOF: {
ty: ActionTypeReduce,
production: genProd("r", "l"),
},
@@ -164,8 +165,8 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[3],
- acts: map[symbol]testActionEntry{
- symbolEOF: {
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
ty: ActionTypeReduce,
production: genProd("s", "r"),
},
@@ -173,7 +174,7 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[4],
- acts: map[symbol]testActionEntry{
+ acts: map[symbol.Symbol]testActionEntry{
genSym("ref"): {
ty: ActionTypeShift,
nextState: expectedKernels[4],
@@ -183,19 +184,19 @@ id: "[A-Za-z0-9_]+";
nextState: expectedKernels[5],
},
},
- goTos: map[symbol][]*lrItem{
+ goTos: map[symbol.Symbol][]*lrItem{
genSym("r"): expectedKernels[7],
genSym("l"): expectedKernels[8],
},
},
{
kernelItems: expectedKernels[5],
- acts: map[symbol]testActionEntry{
+ acts: map[symbol.Symbol]testActionEntry{
genSym("eq"): {
ty: ActionTypeReduce,
production: genProd("l", "id"),
},
- symbolEOF: {
+ symbol.SymbolEOF: {
ty: ActionTypeReduce,
production: genProd("l", "id"),
},
@@ -203,7 +204,7 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[6],
- acts: map[symbol]testActionEntry{
+ acts: map[symbol.Symbol]testActionEntry{
genSym("ref"): {
ty: ActionTypeShift,
nextState: expectedKernels[4],
@@ -213,19 +214,19 @@ id: "[A-Za-z0-9_]+";
nextState: expectedKernels[5],
},
},
- goTos: map[symbol][]*lrItem{
+ goTos: map[symbol.Symbol][]*lrItem{
genSym("l"): expectedKernels[8],
genSym("r"): expectedKernels[9],
},
},
{
kernelItems: expectedKernels[7],
- acts: map[symbol]testActionEntry{
+ acts: map[symbol.Symbol]testActionEntry{
genSym("eq"): {
ty: ActionTypeReduce,
production: genProd("l", "ref", "r"),
},
- symbolEOF: {
+ symbol.SymbolEOF: {
ty: ActionTypeReduce,
production: genProd("l", "ref", "r"),
},
@@ -233,12 +234,12 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[8],
- acts: map[symbol]testActionEntry{
+ acts: map[symbol.Symbol]testActionEntry{
genSym("eq"): {
ty: ActionTypeReduce,
production: genProd("r", "l"),
},
- symbolEOF: {
+ symbol.SymbolEOF: {
ty: ActionTypeReduce,
production: genProd("r", "l"),
},
@@ -246,8 +247,8 @@ id: "[A-Za-z0-9_]+";
},
{
kernelItems: expectedKernels[9],
- acts: map[symbol]testActionEntry{
- symbolEOF: {
+ acts: map[symbol.Symbol]testActionEntry{
+ symbol.SymbolEOF: {
ty: ActionTypeReduce,
production: genProd("s", "l", "eq", "r"),
},
@@ -287,11 +288,11 @@ id: "[A-Za-z0-9_]+";
}
func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) {
- nonEmptyEntries := map[symbolNum]struct{}{}
+ nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
for eSym, eAct := range expectedState.acts {
- nonEmptyEntries[eSym.num()] = struct{}{}
+ nonEmptyEntries[eSym.Num()] = struct{}{}
- ty, stateNum, prodNum := ptab.getAction(state.num, eSym.num())
+ ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num())
if ty != eAct.ty {
t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty)
}
@@ -319,10 +320,10 @@ func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab
}
}
for symNum := 0; symNum < termCount; symNum++ {
- if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked {
+ if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
continue
}
- ty, stateNum, prodNum := ptab.getAction(state.num, symbolNum(symNum))
+ ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum))
if ty != ActionTypeError {
t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum)
}
@@ -330,15 +331,15 @@ func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab
}
func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) {
- nonEmptyEntries := map[symbolNum]struct{}{}
+ nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
for eSym, eGoTo := range expectedState.goTos {
- nonEmptyEntries[eSym.num()] = struct{}{}
+ nonEmptyEntries[eSym.Num()] = struct{}{}
eNextState, err := newKernel(eGoTo)
if err != nil {
t.Fatal(err)
}
- ty, stateNum := ptab.getGoTo(state.num, eSym.num())
+ ty, stateNum := ptab.getGoTo(state.num, eSym.Num())
if ty != GoToTypeRegistered {
t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym)
}
@@ -351,10 +352,10 @@ func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *
}
}
for symNum := 0; symNum < nonTermCount; symNum++ {
- if _, checked := nonEmptyEntries[symbolNum(symNum)]; checked {
+ if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
continue
}
- ty, _ := ptab.getGoTo(state.num, symbolNum(symNum))
+ ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum))
if ty != GoToTypeError {
t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum)
}
diff --git a/grammar/production.go b/grammar/production.go
index 87b392f..1978039 100644
--- a/grammar/production.go
+++ b/grammar/production.go
@@ -4,6 +4,8 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
+
+ "github.com/nihei9/vartan/grammar/symbol"
)
type productionID [32]byte
@@ -12,10 +14,10 @@ func (id productionID) String() string {
return hex.EncodeToString(id[:])
}
-func genProductionID(lhs symbol, rhs []symbol) productionID {
- seq := lhs.byte()
+func genProductionID(lhs symbol.Symbol, rhs []symbol.Symbol) productionID {
+ seq := lhs.Byte()
for _, sym := range rhs {
- seq = append(seq, sym.byte()...)
+ seq = append(seq, sym.Byte()...)
}
return productionID(sha256.Sum256(seq))
}
@@ -35,17 +37,17 @@ func (n productionNum) Int() int {
type production struct {
id productionID
num productionNum
- lhs symbol
- rhs []symbol
+ lhs symbol.Symbol
+ rhs []symbol.Symbol
rhsLen int
}
-func newProduction(lhs symbol, rhs []symbol) (*production, error) {
- if lhs.isNil() {
+func newProduction(lhs symbol.Symbol, rhs []symbol.Symbol) (*production, error) {
+ if lhs.IsNil() {
return nil, fmt.Errorf("LHS must be a non-nil symbol; LHS: %v, RHS: %v", lhs, rhs)
}
for _, sym := range rhs {
- if sym.isNil() {
+ if sym.IsNil() {
return nil, fmt.Errorf("a symbol of RHS must be a non-nil symbol; LHS: %v, RHS: %v", lhs, rhs)
}
}
@@ -63,14 +65,14 @@ func (p *production) isEmpty() bool {
}
type productionSet struct {
- lhs2Prods map[symbol][]*production
+ lhs2Prods map[symbol.Symbol][]*production
id2Prod map[productionID]*production
num productionNum
}
func newProductionSet() *productionSet {
return &productionSet{
- lhs2Prods: map[symbol][]*production{},
+ lhs2Prods: map[symbol.Symbol][]*production{},
id2Prod: map[productionID]*production{},
num: productionNumMin,
}
@@ -81,7 +83,7 @@ func (ps *productionSet) append(prod *production) {
return
}
- if prod.lhs.isStart() {
+ if prod.lhs.IsStart() {
prod.num = productionNumStart
} else {
prod.num = ps.num
@@ -101,8 +103,8 @@ func (ps *productionSet) findByID(id productionID) (*production, bool) {
return prod, ok
}
-func (ps *productionSet) findByLHS(lhs symbol) ([]*production, bool) {
- if lhs.isNil() {
+func (ps *productionSet) findByLHS(lhs symbol.Symbol) ([]*production, bool) {
+ if lhs.IsNil() {
return nil, false
}
diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go
index 589e324..88a6b17 100644
--- a/grammar/semantic_error.go
+++ b/grammar/semantic_error.go
@@ -1,42 +1,30 @@
package grammar
-type SemanticError struct {
- message string
-}
-
-func newSemanticError(message string) *SemanticError {
- return &SemanticError{
- message: message,
- }
-}
-
-func (e *SemanticError) Error() string {
- return e.message
-}
+import "errors"
var (
- semErrNoGrammarName = newSemanticError("name is missing")
- semErrSpellingInconsistency = newSemanticError("the identifiers are treated as the same. please use the same spelling")
- semErrDuplicateAssoc = newSemanticError("associativity and precedence cannot be specified multiple times for a symbol")
- semErrUndefinedPrec = newSemanticError("symbol must has precedence")
- semErrUndefinedOrdSym = newSemanticError("undefined ordered symbol")
- semErrUnusedProduction = newSemanticError("unused production")
- semErrUnusedTerminal = newSemanticError("unused terminal")
- semErrTermCannotBeSkipped = newSemanticError("a terminal used in productions cannot be skipped")
- semErrNoProduction = newSemanticError("a grammar needs at least one production")
- semErrUndefinedSym = newSemanticError("undefined symbol")
- semErrDuplicateProduction = newSemanticError("duplicate production")
- semErrDuplicateTerminal = newSemanticError("duplicate terminal")
- semErrDuplicateFragment = newSemanticError("duplicate fragment")
- semErrDuplicateName = newSemanticError("duplicate names are not allowed between terminals and non-terminals")
- semErrErrSymIsReserved = newSemanticError("symbol 'error' is reserved as a terminal symbol")
- semErrDuplicateLabel = newSemanticError("a label must be unique in an alternative")
- semErrInvalidLabel = newSemanticError("a label must differ from terminal symbols or non-terminal symbols")
- semErrDirInvalidName = newSemanticError("invalid directive name")
- semErrDirInvalidParam = newSemanticError("invalid parameter")
- semErrDuplicateDir = newSemanticError("a directive must not be duplicated")
- semErrDuplicateElem = newSemanticError("duplicate element")
- semErrAmbiguousElem = newSemanticError("ambiguous element")
- semErrInvalidProdDir = newSemanticError("invalid production directive")
- semErrInvalidAltDir = newSemanticError("invalid alternative directive")
+ semErrNoGrammarName = errors.New("name is missing")
+ semErrSpellingInconsistency = errors.New("the identifiers are treated as the same. please use the same spelling")
+ semErrDuplicateAssoc = errors.New("associativity and precedence cannot be specified multiple times for a symbol")
+ semErrUndefinedPrec = errors.New("symbol must has precedence")
+ semErrUndefinedOrdSym = errors.New("undefined ordered symbol")
+ semErrUnusedProduction = errors.New("unused production")
+ semErrUnusedTerminal = errors.New("unused terminal")
+ semErrTermCannotBeSkipped = errors.New("a terminal used in productions cannot be skipped")
+ semErrNoProduction = errors.New("a grammar needs at least one production")
+ semErrUndefinedSym = errors.New("undefined symbol")
+ semErrDuplicateProduction = errors.New("duplicate production")
+ semErrDuplicateTerminal = errors.New("duplicate terminal")
+ semErrDuplicateFragment = errors.New("duplicate fragment")
+ semErrDuplicateName = errors.New("duplicate names are not allowed between terminals and non-terminals")
+ semErrErrSymIsReserved = errors.New("symbol 'error' is reserved as a terminal symbol")
+ semErrDuplicateLabel = errors.New("a label must be unique in an alternative")
+ semErrInvalidLabel = errors.New("a label must differ from terminal symbols or non-terminal symbols")
+ semErrDirInvalidName = errors.New("invalid directive name")
+ semErrDirInvalidParam = errors.New("invalid parameter")
+ semErrDuplicateDir = errors.New("a directive must not be duplicated")
+ semErrDuplicateElem = errors.New("duplicate element")
+ semErrAmbiguousElem = errors.New("ambiguous element")
+ semErrInvalidProdDir = errors.New("invalid production directive")
+ semErrInvalidAltDir = errors.New("invalid alternative directive")
)
diff --git a/grammar/symbol.go b/grammar/symbol/symbol.go
index 9eba032..f9e6a93 100644
--- a/grammar/symbol.go
+++ b/grammar/symbol/symbol.go
@@ -1,4 +1,4 @@
-package grammar
+package symbol
import (
"fmt"
@@ -16,15 +16,15 @@ func (t symbolKind) String() string {
return string(t)
}
-type symbolNum uint16
+type SymbolNum uint16
-func (n symbolNum) Int() int {
+func (n SymbolNum) Int() int {
return int(n)
}
-type symbol uint16
+type Symbol uint16
-func (s symbol) String() string {
+func (s Symbol) String() string {
kind, isStart, isEOF, num := s.describe()
var prefix string
switch {
@@ -56,24 +56,24 @@ const (
symbolNumStart = uint16(0x0001) // 0000 0000 0000 0001
symbolNumEOF = uint16(0x0001) // 0000 0000 0000 0001
- symbolNil = symbol(0) // 0000 0000 0000 0000
- symbolStart = symbol(maskNonTerminal | maskStartOrEOF | symbolNumStart) // 0100 0000 0000 0001
- symbolEOF = symbol(maskTerminal | maskStartOrEOF | symbolNumEOF) // 1100 0000 0000 0001: The EOF symbol is treated as a terminal symbol.
+ SymbolNil = Symbol(0) // 0000 0000 0000 0000
+ symbolStart = Symbol(maskNonTerminal | maskStartOrEOF | symbolNumStart) // 0100 0000 0000 0001
+ SymbolEOF = Symbol(maskTerminal | maskStartOrEOF | symbolNumEOF) // 1100 0000 0000 0001: The EOF symbol is treated as a terminal symbol.
// The symbol name contains `<` and `>` to avoid conflicting with user-defined symbols.
symbolNameEOF = "<eof>"
- nonTerminalNumMin = symbolNum(2) // The number 1 is used by a start symbol.
- terminalNumMin = symbolNum(2) // The number 1 is used by the EOF symbol.
- symbolNumMax = symbolNum(0xffff) >> 2 // 0011 1111 1111 1111
+ nonTerminalNumMin = SymbolNum(2) // The number 1 is used by a start symbol.
+ terminalNumMin = SymbolNum(2) // The number 1 is used by the EOF symbol.
+ symbolNumMax = SymbolNum(0xffff) >> 2 // 0011 1111 1111 1111
)
-func newSymbol(kind symbolKind, isStart bool, num symbolNum) (symbol, error) {
+func newSymbol(kind symbolKind, isStart bool, num SymbolNum) (Symbol, error) {
if num > symbolNumMax {
- return symbolNil, fmt.Errorf("a symbol number exceeds the limit; limit: %v, passed: %v", symbolNumMax, num)
+ return SymbolNil, fmt.Errorf("a symbol number exceeds the limit; limit: %v, passed: %v", symbolNumMax, num)
}
if kind == symbolKindTerminal && isStart {
- return symbolNil, fmt.Errorf("a start symbol must be a non-terminal symbol")
+ return SymbolNil, fmt.Errorf("a start symbol must be a non-terminal symbol")
}
kindMask := maskNonTerminal
@@ -84,58 +84,58 @@ func newSymbol(kind symbolKind, isStart bool, num symbolNum) (symbol, error) {
if isStart {
startMask = maskStartOrEOF
}
- return symbol(kindMask | startMask | uint16(num)), nil
+ return Symbol(kindMask | startMask | uint16(num)), nil
}
-func (s symbol) num() symbolNum {
+func (s Symbol) Num() SymbolNum {
_, _, _, num := s.describe()
return num
}
-func (s symbol) byte() []byte {
- if s.isNil() {
+func (s Symbol) Byte() []byte {
+ if s.IsNil() {
return []byte{0, 0}
}
return []byte{byte(uint16(s) >> 8), byte(uint16(s) & 0x00ff)}
}
-func (s symbol) isNil() bool {
+func (s Symbol) IsNil() bool {
_, _, _, num := s.describe()
return num == 0
}
-func (s symbol) isStart() bool {
- if s.isNil() {
+func (s Symbol) IsStart() bool {
+ if s.IsNil() {
return false
}
_, isStart, _, _ := s.describe()
return isStart
}
-func (s symbol) isEOF() bool {
- if s.isNil() {
+func (s Symbol) isEOF() bool {
+ if s.IsNil() {
return false
}
_, _, isEOF, _ := s.describe()
return isEOF
}
-func (s symbol) isNonTerminal() bool {
- if s.isNil() {
+func (s Symbol) isNonTerminal() bool {
+ if s.IsNil() {
return false
}
kind, _, _, _ := s.describe()
return kind == symbolKindNonTerminal
}
-func (s symbol) isTerminal() bool {
- if s.isNil() {
+func (s Symbol) IsTerminal() bool {
+ if s.IsNil() {
return false
}
return !s.isNonTerminal()
}
-func (s symbol) describe() (symbolKind, bool, bool, symbolNum) {
+func (s Symbol) describe() (symbolKind, bool, bool, SymbolNum) {
kind := symbolKindNonTerminal
if uint16(s)&maskKindPart > 0 {
kind = symbolKindTerminal
@@ -149,34 +149,34 @@ func (s symbol) describe() (symbolKind, bool, bool, symbolNum) {
isEOF = true
}
}
- num := symbolNum(uint16(s) & maskNumberPart)
+ num := SymbolNum(uint16(s) & maskNumberPart)
return kind, isStart, isEOF, num
}
-type symbolTable struct {
- text2Sym map[string]symbol
- sym2Text map[symbol]string
+type SymbolTable struct {
+ text2Sym map[string]Symbol
+ sym2Text map[Symbol]string
nonTermTexts []string
termTexts []string
- nonTermNum symbolNum
- termNum symbolNum
+ nonTermNum SymbolNum
+ termNum SymbolNum
}
-type symbolTableWriter struct {
- *symbolTable
+type SymbolTableWriter struct {
+ *SymbolTable
}
-type symbolTableReader struct {
- *symbolTable
+type SymbolTableReader struct {
+ *SymbolTable
}
-func newSymbolTable() *symbolTable {
- return &symbolTable{
- text2Sym: map[string]symbol{
- symbolNameEOF: symbolEOF,
+func NewSymbolTable() *SymbolTable {
+ return &SymbolTable{
+ text2Sym: map[string]Symbol{
+ symbolNameEOF: SymbolEOF,
},
- sym2Text: map[symbol]string{
- symbolEOF: symbolNameEOF,
+ sym2Text: map[Symbol]string{
+ SymbolEOF: symbolNameEOF,
},
termTexts: []string{
"", // Nil
@@ -191,32 +191,32 @@ func newSymbolTable() *symbolTable {
}
}
-func (t *symbolTable) writer() *symbolTableWriter {
- return &symbolTableWriter{
- symbolTable: t,
+func (t *SymbolTable) Writer() *SymbolTableWriter {
+ return &SymbolTableWriter{
+ SymbolTable: t,
}
}
-func (t *symbolTable) reader() *symbolTableReader {
- return &symbolTableReader{
- symbolTable: t,
+func (t *SymbolTable) Reader() *SymbolTableReader {
+ return &SymbolTableReader{
+ SymbolTable: t,
}
}
-func (w *symbolTableWriter) registerStartSymbol(text string) (symbol, error) {
+func (w *SymbolTableWriter) RegisterStartSymbol(text string) (Symbol, error) {
w.text2Sym[text] = symbolStart
w.sym2Text[symbolStart] = text
- w.nonTermTexts[symbolStart.num().Int()] = text
+ w.nonTermTexts[symbolStart.Num().Int()] = text
return symbolStart, nil
}
-func (w *symbolTableWriter) registerNonTerminalSymbol(text string) (symbol, error) {
+func (w *SymbolTableWriter) RegisterNonTerminalSymbol(text string) (Symbol, error) {
if sym, ok := w.text2Sym[text]; ok {
return sym, nil
}
sym, err := newSymbol(symbolKindNonTerminal, false, w.nonTermNum)
if err != nil {
- return symbolNil, err
+ return SymbolNil, err
}
w.nonTermNum++
w.text2Sym[text] = sym
@@ -225,13 +225,13 @@ func (w *symbolTableWriter) registerNonTerminalSymbol(text string) (symbol, erro
return sym, nil
}
-func (w *symbolTableWriter) registerTerminalSymbol(text string) (symbol, error) {
+func (w *SymbolTableWriter) RegisterTerminalSymbol(text string) (Symbol, error) {
if sym, ok := w.text2Sym[text]; ok {
return sym, nil
}
sym, err := newSymbol(symbolKindTerminal, false, w.termNum)
if err != nil {
- return symbolNil, err
+ return SymbolNil, err
}
w.termNum++
w.text2Sym[text] = sym
@@ -240,22 +240,22 @@ func (w *symbolTableWriter) registerTerminalSymbol(text string) (symbol, error)
return sym, nil
}
-func (r *symbolTableReader) toSymbol(text string) (symbol, bool) {
+func (r *SymbolTableReader) ToSymbol(text string) (Symbol, bool) {
if sym, ok := r.text2Sym[text]; ok {
return sym, true
}
- return symbolNil, false
+ return SymbolNil, false
}
-func (r *symbolTableReader) toText(sym symbol) (string, bool) {
+func (r *SymbolTableReader) ToText(sym Symbol) (string, bool) {
text, ok := r.sym2Text[sym]
return text, ok
}
-func (r *symbolTableReader) terminalSymbols() []symbol {
- syms := make([]symbol, 0, r.termNum.Int()-terminalNumMin.Int())
+func (r *SymbolTableReader) TerminalSymbols() []Symbol {
+ syms := make([]Symbol, 0, r.termNum.Int()-terminalNumMin.Int())
for sym := range r.sym2Text {
- if !sym.isTerminal() || sym.isNil() {
+ if !sym.IsTerminal() || sym.IsNil() {
continue
}
syms = append(syms, sym)
@@ -266,17 +266,17 @@ func (r *symbolTableReader) terminalSymbols() []symbol {
return syms
}
-func (r *symbolTableReader) terminalTexts() ([]string, error) {
+func (r *SymbolTableReader) TerminalTexts() ([]string, error) {
if r.termNum == terminalNumMin {
return nil, fmt.Errorf("symbol table has no terminals")
}
return r.termTexts, nil
}
-func (r *symbolTableReader) nonTerminalSymbols() []symbol {
- syms := make([]symbol, 0, r.nonTermNum.Int()-nonTerminalNumMin.Int())
+func (r *SymbolTableReader) NonTerminalSymbols() []Symbol {
+ syms := make([]Symbol, 0, r.nonTermNum.Int()-nonTerminalNumMin.Int())
for sym := range r.sym2Text {
- if !sym.isNonTerminal() || sym.isNil() {
+ if !sym.isNonTerminal() || sym.IsNil() {
continue
}
syms = append(syms, sym)
@@ -287,8 +287,8 @@ func (r *symbolTableReader) nonTerminalSymbols() []symbol {
return syms
}
-func (r *symbolTableReader) nonTerminalTexts() ([]string, error) {
- if r.nonTermNum == nonTerminalNumMin || r.nonTermTexts[symbolStart.num().Int()] == "" {
+func (r *SymbolTableReader) NonTerminalTexts() ([]string, error) {
+ if r.nonTermNum == nonTerminalNumMin || r.nonTermTexts[symbolStart.Num().Int()] == "" {
return nil, fmt.Errorf("symbol table has no terminals or no start symbol")
}
return r.nonTermTexts, nil
diff --git a/grammar/symbol_test.go b/grammar/symbol/symbol_test.go
index b9bcbdf..31c3edd 100644
--- a/grammar/symbol_test.go
+++ b/grammar/symbol/symbol_test.go
@@ -1,19 +1,19 @@
-package grammar
+package symbol
import "testing"
func TestSymbol(t *testing.T) {
- tab := newSymbolTable()
- w := tab.writer()
- _, _ = w.registerStartSymbol("expr'")
- _, _ = w.registerNonTerminalSymbol("expr")
- _, _ = w.registerNonTerminalSymbol("term")
- _, _ = w.registerNonTerminalSymbol("factor")
- _, _ = w.registerTerminalSymbol("id")
- _, _ = w.registerTerminalSymbol("add")
- _, _ = w.registerTerminalSymbol("mul")
- _, _ = w.registerTerminalSymbol("l_paren")
- _, _ = w.registerTerminalSymbol("r_paren")
+ tab := NewSymbolTable()
+ w := tab.Writer()
+ _, _ = w.RegisterStartSymbol("expr'")
+ _, _ = w.RegisterNonTerminalSymbol("expr")
+ _, _ = w.RegisterNonTerminalSymbol("term")
+ _, _ = w.RegisterNonTerminalSymbol("factor")
+ _, _ = w.RegisterTerminalSymbol("id")
+ _, _ = w.RegisterTerminalSymbol("add")
+ _, _ = w.RegisterTerminalSymbol("mul")
+ _, _ = w.RegisterTerminalSymbol("l_paren")
+ _, _ = w.RegisterTerminalSymbol("r_paren")
nonTermTexts := []string{
"", // Nil
@@ -81,13 +81,13 @@ func TestSymbol(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.text, func(t *testing.T) {
- r := tab.reader()
- sym, ok := r.toSymbol(tt.text)
+ r := tab.Reader()
+ sym, ok := r.ToSymbol(tt.text)
if !ok {
t.Fatalf("symbol was not found")
}
testSymbolProperty(t, sym, tt.isNil, tt.isStart, tt.isEOF, tt.isNonTerminal, tt.isTerminal)
- text, ok := r.toText(sym)
+ text, ok := r.ToText(sym)
if !ok {
t.Fatalf("text was not found")
}
@@ -98,16 +98,16 @@ func TestSymbol(t *testing.T) {
}
t.Run("EOF", func(t *testing.T) {
- testSymbolProperty(t, symbolEOF, false, false, true, false, true)
+ testSymbolProperty(t, SymbolEOF, false, false, true, false, true)
})
t.Run("Nil", func(t *testing.T) {
- testSymbolProperty(t, symbolNil, true, false, false, false, false)
+ testSymbolProperty(t, SymbolNil, true, false, false, false, false)
})
t.Run("texts of non-terminals", func(t *testing.T) {
- r := tab.reader()
- ts, err := r.nonTerminalTexts()
+ r := tab.Reader()
+ ts, err := r.NonTerminalTexts()
if err != nil {
t.Fatal(err)
}
@@ -122,8 +122,8 @@ func TestSymbol(t *testing.T) {
})
t.Run("texts of terminals", func(t *testing.T) {
- r := tab.reader()
- ts, err := r.terminalTexts()
+ r := tab.Reader()
+ ts, err := r.TerminalTexts()
if err != nil {
t.Fatal(err)
}
@@ -138,13 +138,13 @@ func TestSymbol(t *testing.T) {
})
}
-func testSymbolProperty(t *testing.T, sym symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) {
+func testSymbolProperty(t *testing.T, sym Symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) {
t.Helper()
- if v := sym.isNil(); v != isNil {
+ if v := sym.IsNil(); v != isNil {
t.Fatalf("isNil property is mismatched; want: %v, got: %v", isNil, v)
}
- if v := sym.isStart(); v != isStart {
+ if v := sym.IsStart(); v != isStart {
t.Fatalf("isStart property is mismatched; want: %v, got: %v", isStart, v)
}
if v := sym.isEOF(); v != isEOF {
@@ -153,7 +153,7 @@ func testSymbolProperty(t *testing.T, sym symbol, isNil, isStart, isEOF, isNonTe
if v := sym.isNonTerminal(); v != isNonTerminal {
t.Fatalf("isNonTerminal property is mismatched; want: %v, got: %v", isNonTerminal, v)
}
- if v := sym.isTerminal(); v != isTerminal {
+ if v := sym.IsTerminal(); v != isTerminal {
t.Fatalf("isTerminal property is mismatched; want: %v, got: %v", isTerminal, v)
}
}
diff --git a/grammar/test_helper_test.go b/grammar/test_helper_test.go
index 1dcdede..297a9a3 100644
--- a/grammar/test_helper_test.go
+++ b/grammar/test_helper_test.go
@@ -1,14 +1,18 @@
package grammar
-import "testing"
+import (
+ "testing"
-type testSymbolGenerator func(text string) symbol
+ "github.com/nihei9/vartan/grammar/symbol"
+)
-func newTestSymbolGenerator(t *testing.T, symTab *symbolTableReader) testSymbolGenerator {
- return func(text string) symbol {
+type testSymbolGenerator func(text string) symbol.Symbol
+
+func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator {
+ return func(text string) symbol.Symbol {
t.Helper()
- sym, ok := symTab.toSymbol(text)
+ sym, ok := symTab.ToSymbol(text)
if !ok {
t.Fatalf("symbol was not found: %v", text)
}
@@ -22,7 +26,7 @@ func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testPr
return func(lhs string, rhs ...string) *production {
t.Helper()
- rhsSym := []symbol{}
+ rhsSym := []symbol.Symbol{}
for _, text := range rhs {
rhsSym = append(rhsSym, genSym(text))
}
@@ -51,9 +55,9 @@ func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) test
}
}
-func withLookAhead(item *lrItem, lookAhead ...symbol) *lrItem {
+func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem {
if item.lookAhead.symbols == nil {
- item.lookAhead.symbols = map[symbol]struct{}{}
+ item.lookAhead.symbols = map[symbol.Symbol]struct{}{}
}
for _, a := range lookAhead {