aboutsummaryrefslogtreecommitdiff
path: root/grammar
diff options
context:
space:
mode:
Diffstat (limited to 'grammar')
-rw-r--r--grammar/first.go148
-rw-r--r--grammar/first_test.go219
-rw-r--r--grammar/grammar.go1390
-rw-r--r--grammar/grammar_test.go3381
-rw-r--r--grammar/item.go206
-rw-r--r--grammar/lalr1.go318
-rw-r--r--grammar/lalr1_test.go187
-rw-r--r--grammar/lexical/compiler.go413
-rw-r--r--grammar/lexical/compiler_test.go338
-rw-r--r--grammar/lexical/dfa/dfa.go173
-rw-r--r--grammar/lexical/dfa/dfa_test.go121
-rw-r--r--grammar/lexical/dfa/symbol_position.go182
-rw-r--r--grammar/lexical/dfa/symbol_position_test.go79
-rw-r--r--grammar/lexical/dfa/tree.go567
-rw-r--r--grammar/lexical/dfa/tree_test.go257
-rw-r--r--grammar/lexical/entry.go171
-rw-r--r--grammar/lexical/parser/error.go36
-rw-r--r--grammar/lexical/parser/fragment.go72
-rw-r--r--grammar/lexical/parser/lexer.go594
-rw-r--r--grammar/lexical/parser/lexer_test.go524
-rw-r--r--grammar/lexical/parser/parser.go531
-rw-r--r--grammar/lexical/parser/parser_test.go1389
-rw-r--r--grammar/lexical/parser/tree.go459
-rw-r--r--grammar/lr0.go197
-rw-r--r--grammar/lr0_test.go448
-rw-r--r--grammar/parsing_table.go553
-rw-r--r--grammar/parsing_table_test.go387
-rw-r--r--grammar/production.go117
-rw-r--r--grammar/semantic_error.go30
-rw-r--r--grammar/symbol/symbol.go295
-rw-r--r--grammar/symbol/symbol_test.go159
-rw-r--r--grammar/test_helper_test.go68
32 files changed, 0 insertions, 14009 deletions
diff --git a/grammar/first.go b/grammar/first.go
deleted file mode 100644
index 4f0bdcf..0000000
--- a/grammar/first.go
+++ /dev/null
@@ -1,148 +0,0 @@
-package grammar
-
-import (
- "fmt"
-
- "grammar/symbol"
-)
-
-type firstEntry struct {
- symbols map[symbol.Symbol]struct{}
- empty bool
-}
-
-func newFirstEntry() *firstEntry {
- return &firstEntry{
- symbols: map[symbol.Symbol]struct{}{},
- empty: false,
- }
-}
-
-func (e *firstEntry) add(sym symbol.Symbol) bool {
- if _, ok := e.symbols[sym]; ok {
- return false
- }
- e.symbols[sym] = struct{}{}
- return true
-}
-
-func (e *firstEntry) addEmpty() bool {
- if !e.empty {
- e.empty = true
- return true
- }
- return false
-}
-
-func (e *firstEntry) mergeExceptEmpty(target *firstEntry) bool {
- if target == nil {
- return false
- }
- changed := false
- for sym := range target.symbols {
- added := e.add(sym)
- if added {
- changed = true
- }
- }
- return changed
-}
-
-type firstSet struct {
- set map[symbol.Symbol]*firstEntry
-}
-
-func newFirstSet(prods *productionSet) *firstSet {
- fst := &firstSet{
- set: map[symbol.Symbol]*firstEntry{},
- }
- for _, prod := range prods.getAllProductions() {
- if _, ok := fst.set[prod.lhs]; ok {
- continue
- }
- fst.set[prod.lhs] = newFirstEntry()
- }
-
- return fst
-}
-
-func (fst *firstSet) find(prod *production, head int) (*firstEntry, error) {
- entry := newFirstEntry()
- if prod.rhsLen <= head {
- entry.addEmpty()
- return entry, nil
- }
- for _, sym := range prod.rhs[head:] {
- if sym.IsTerminal() {
- entry.add(sym)
- return entry, nil
- }
-
- e := fst.findBySymbol(sym)
- if e == nil {
- return nil, fmt.Errorf("an entry of FIRST was not found; symbol: %s", sym)
- }
- for s := range e.symbols {
- entry.add(s)
- }
- if !e.empty {
- return entry, nil
- }
- }
- entry.addEmpty()
- return entry, nil
-}
-
-func (fst *firstSet) findBySymbol(sym symbol.Symbol) *firstEntry {
- return fst.set[sym]
-}
-
-type firstComContext struct {
- first *firstSet
-}
-
-func newFirstComContext(prods *productionSet) *firstComContext {
- return &firstComContext{
- first: newFirstSet(prods),
- }
-}
-
-func genFirstSet(prods *productionSet) (*firstSet, error) {
- cc := newFirstComContext(prods)
- for {
- more := false
- for _, prod := range prods.getAllProductions() {
- e := cc.first.findBySymbol(prod.lhs)
- changed, err := genProdFirstEntry(cc, e, prod)
- if err != nil {
- return nil, err
- }
- if changed {
- more = true
- }
- }
- if !more {
- break
- }
- }
- return cc.first, nil
-}
-
-func genProdFirstEntry(cc *firstComContext, acc *firstEntry, prod *production) (bool, error) {
- if prod.isEmpty() {
- return acc.addEmpty(), nil
- }
-
- for _, sym := range prod.rhs {
- if sym.IsTerminal() {
- return acc.add(sym), nil
- }
-
- e := cc.first.findBySymbol(sym)
- changed := acc.mergeExceptEmpty(e)
- if !e.empty {
- return changed, nil
- }
- }
- return acc.addEmpty(), nil
-}
diff --git a/grammar/first_test.go b/grammar/first_test.go
deleted file mode 100644
index ea1a61c..0000000
--- a/grammar/first_test.go
+++ /dev/null
@@ -1,219 +0,0 @@
-package grammar
-
-import (
- "strings"
- "testing"
-
- "grammar/symbol"
- "spec/grammar/parser"
-)
-
-type first struct {
- lhs string
- num int
- dot int
- symbols []string
- empty bool
-}
-
-func TestGenFirst(t *testing.T) {
- tests := []struct {
- caption string
- src string
- first []first
- }{
- {
- caption: "productions contain only non-empty productions",
- src: `
-#name test;
-
-expr
- : expr add term
- | term
- ;
-term
- : term mul factor
- | factor
- ;
-factor
- : l_paren expr r_paren
- | id
- ;
-add: "\+";
-mul: "\*";
-l_paren: "\(";
-r_paren: "\)";
-id: "[A-Za-z_][0-9A-Za-z_]*";
-`,
- first: []first{
- {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}},
- {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
- {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}},
- {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}},
- {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}},
- {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}},
- {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}},
- {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}},
- {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}},
- },
- },
- {
- caption: "productions contain the empty start production",
- src: `
-#name test;
-
-s
- :
- ;
-`,
- first: []first{
- {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true},
- {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true},
- },
- },
- {
- caption: "productions contain an empty production",
- src: `
-#name test;
-
-s
- : foo bar
- ;
-foo
- :
- ;
-bar: "bar";
-`,
- first: []first{
- {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
- {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false},
- {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true},
- },
- },
- {
- caption: "a start production contains a non-empty alternative and empty alternative",
- src: `
-#name test;
-
-s
- : foo
- |
- ;
-foo: "foo";
-`,
- first: []first{
- {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true},
- {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}},
- {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true},
- },
- },
- {
- caption: "a production contains non-empty alternative and empty alternative",
- src: `
-#name test;
-
-s
- : foo
- ;
-foo
- : bar
- |
- ;
-bar: "bar";
-`,
- first: []first{
- {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
- {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true},
- {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}},
- {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true},
- },
- },
- }
- for _, tt := range tests {
- t.Run(tt.caption, func(t *testing.T) {
- fst, gram := genActualFirst(t, tt.src)
-
- for _, ttFirst := range tt.first {
- lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs)
- if !ok {
- t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs)
- }
-
- prod, ok := gram.productionSet.findByLHS(lhsSym)
- if !ok {
- t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym)
- }
-
- actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot)
- if err != nil {
- t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err)
- }
-
- expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable)
-
- testFirst(t, actualFirst, expectedFirst)
- }
- })
- }
-}
-
-func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err := b.build()
- if err != nil {
- t.Fatal(err)
- }
- fst, err := genFirstSet(gram.productionSet)
- if err != nil {
- t.Fatal(err)
- }
- if fst == nil {
- t.Fatal("genFiest returned nil without any error")
- }
-
- return fst, gram
-}
-
-func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry {
- t.Helper()
-
- entry := newFirstEntry()
- if empty {
- entry.addEmpty()
- }
- for _, sym := range symbols {
- symSym, ok := symTab.ToSymbol(sym)
- if !ok {
- t.Fatalf("a symbol was not found; symbol: %v", sym)
- }
- entry.add(symSym)
- }
-
- return entry
-}
-
-func testFirst(t *testing.T, actual, expected *firstEntry) {
- if actual.empty != expected.empty {
- t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty)
- }
-
- if len(actual.symbols) != len(expected.symbols) {
- t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
- }
-
- for eSym := range expected.symbols {
- if _, ok := actual.symbols[eSym]; !ok {
- t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols)
- }
- }
-}
diff --git a/grammar/grammar.go b/grammar/grammar.go
deleted file mode 100644
index 9fb2968..0000000
--- a/grammar/grammar.go
+++ /dev/null
@@ -1,1390 +0,0 @@
-package grammar
-
-import (
- "fmt"
- "io"
- "strings"
-
- verr "error"
- "grammar/lexical"
- "grammar/symbol"
- spec "spec/grammar"
- "spec/grammar/parser"
-)
-
-type astActionEntry struct {
- position int
- expansion bool
-}
-
-type assocType string
-
-const (
- assocTypeNil = assocType("")
- assocTypeLeft = assocType("left")
- assocTypeRight = assocType("right")
-)
-
-const (
- precNil = 0
- precMin = 1
-)
-
-// precAndAssoc represents precedence and associativities of terminal symbols and productions.
-// We use the priority of the production to resolve shift/reduce conflicts.
-type precAndAssoc struct {
- // termPrec and termAssoc represent the precedence of the terminal symbols.
- termPrec map[symbol.SymbolNum]int
- termAssoc map[symbol.SymbolNum]assocType
-
- // prodPrec and prodAssoc represent the precedence and the associativities of the production.
- // These values are inherited from the right-most terminal symbols in the RHS of the productions.
- prodPrec map[productionNum]int
- prodAssoc map[productionNum]assocType
-}
-
-func (pa *precAndAssoc) terminalPrecedence(sym symbol.SymbolNum) int {
- prec, ok := pa.termPrec[sym]
- if !ok {
- return precNil
- }
-
- return prec
-}
-
-func (pa *precAndAssoc) terminalAssociativity(sym symbol.SymbolNum) assocType {
- assoc, ok := pa.termAssoc[sym]
- if !ok {
- return assocTypeNil
- }
-
- return assoc
-}
-
-func (pa *precAndAssoc) productionPredence(prod productionNum) int {
- prec, ok := pa.prodPrec[prod]
- if !ok {
- return precNil
- }
-
- return prec
-}
-
-func (pa *precAndAssoc) productionAssociativity(prod productionNum) assocType {
- assoc, ok := pa.prodAssoc[prod]
- if !ok {
- return assocTypeNil
- }
-
- return assoc
-}
-
-const reservedSymbolNameError = "error"
-
-type Grammar struct {
- name string
- lexSpec *lexical.LexSpec
- skipSymbols []symbol.Symbol
- productionSet *productionSet
- augmentedStartSymbol symbol.Symbol
- errorSymbol symbol.Symbol
- symbolTable *symbol.SymbolTableReader
- astActions map[productionID][]*astActionEntry
- precAndAssoc *precAndAssoc
-
- // recoverProductions is a set of productions having the recover directive.
- recoverProductions map[productionID]struct{}
-}
-
-type buildConfig struct {
- isReportingEnabled bool
-}
-
-type BuildOption func(config *buildConfig)
-
-func EnableReporting() BuildOption {
- return func(config *buildConfig) {
- config.isReportingEnabled = true
- }
-}
-
-type GrammarBuilder struct {
- AST *parser.RootNode
-
- errs verr.SpecErrors
-}
-
-func (b *GrammarBuilder) Build(opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) {
- gram, err := b.build()
- if err != nil {
- return nil, nil, err
- }
-
- return compile(gram, opts...)
-}
-
-func (b *GrammarBuilder) build() (*Grammar, error) {
- var specName string
- {
- errOccurred := false
- for _, dir := range b.AST.Directives {
- if dir.Name != "name" {
- continue
- }
-
- if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'name' takes just one ID parameter",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
-
- errOccurred = true
- break
- }
-
- specName = dir.Parameters[0].ID
- break
- }
-
- if specName == "" && !errOccurred {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrNoGrammarName,
- })
- }
- }
-
- b.checkSpellingInconsistenciesOfUserDefinedIDs(b.AST)
- if len(b.errs) > 0 {
- return nil, b.errs
- }
-
- symTab, ss, err := b.genSymbolTable(b.AST)
- if err != nil {
- return nil, err
- }
-
- lexSpec, skip, err := b.genLexSpecAndSkipSymbols(symTab.Reader(), b.AST)
- if err != nil {
- return nil, err
- }
-
- prodsAndActs, err := b.genProductionsAndActions(b.AST, symTab.Reader(), ss.errSym, ss.augStartSym, ss.startSym)
- if err != nil {
- return nil, err
- }
- if prodsAndActs == nil && len(b.errs) > 0 {
- return nil, b.errs
- }
-
- pa, err := b.genPrecAndAssoc(symTab.Reader(), ss.errSym, prodsAndActs)
- if err != nil {
- return nil, err
- }
- if pa == nil && len(b.errs) > 0 {
- return nil, b.errs
- }
-
- syms := findUsedAndUnusedSymbols(b.AST)
- if syms == nil && len(b.errs) > 0 {
- return nil, b.errs
- }
-
- // When a terminal symbol that cannot be reached from the start symbol has the skip directive,
- // the compiler treats its terminal as a used symbol, not unused.
- {
- r := symTab.Reader()
- for _, sym := range skip {
- s, _ := r.ToText(sym)
- if _, ok := syms.unusedTerminals[s]; !ok {
- prod := syms.usedTerminals[s]
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrTermCannotBeSkipped,
- Detail: s,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
- continue
- }
-
- delete(syms.unusedTerminals, s)
- }
- }
-
- for sym, prod := range syms.unusedProductions {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrUnusedProduction,
- Detail: sym,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
- }
-
- for sym, prod := range syms.unusedTerminals {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrUnusedTerminal,
- Detail: sym,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
- }
-
- if len(b.errs) > 0 {
- return nil, b.errs
- }
-
- return &Grammar{
- name: specName,
- lexSpec: lexSpec,
- skipSymbols: skip,
- productionSet: prodsAndActs.prods,
- augmentedStartSymbol: prodsAndActs.augStartSym,
- errorSymbol: ss.errSym,
- symbolTable: symTab.Reader(),
- astActions: prodsAndActs.astActs,
- recoverProductions: prodsAndActs.recoverProds,
- precAndAssoc: pa,
- }, nil
-}
-
-type usedAndUnusedSymbols struct {
- unusedProductions map[string]*parser.ProductionNode
- unusedTerminals map[string]*parser.ProductionNode
- usedTerminals map[string]*parser.ProductionNode
-}
-
-func findUsedAndUnusedSymbols(root *parser.RootNode) *usedAndUnusedSymbols {
- prods := map[string]*parser.ProductionNode{}
- lexProds := map[string]*parser.ProductionNode{}
- mark := map[string]bool{}
- {
- for _, p := range root.Productions {
- prods[p.LHS] = p
- mark[p.LHS] = false
- for _, alt := range p.RHS {
- for _, e := range alt.Elements {
- if e.ID == "" {
- continue
- }
- mark[e.ID] = false
- }
- }
- }
-
- for _, p := range root.LexProductions {
- lexProds[p.LHS] = p
- mark[p.LHS] = false
- }
-
- start := root.Productions[0]
- mark[start.LHS] = true
- markUsedSymbols(mark, map[string]bool{}, prods, start)
-
- // We don't have to check the error symbol because the error symbol doesn't have a production.
- delete(mark, reservedSymbolNameError)
- }
-
- usedTerms := make(map[string]*parser.ProductionNode, len(lexProds))
- unusedProds := map[string]*parser.ProductionNode{}
- unusedTerms := map[string]*parser.ProductionNode{}
- for sym, used := range mark {
- if p, ok := prods[sym]; ok {
- if used {
- continue
- }
- unusedProds[sym] = p
- continue
- }
- if p, ok := lexProds[sym]; ok {
- if used {
- usedTerms[sym] = p
- } else {
- unusedTerms[sym] = p
- }
- continue
- }
-
- // May be reached here when a fragment name appears on the right-hand side of a production rule. However, an error
- // to the effect that a production rule cannot contain a fragment will be detected in a subsequent process. So we can
- // ignore it here.
- }
-
- return &usedAndUnusedSymbols{
- usedTerminals: usedTerms,
- unusedProductions: unusedProds,
- unusedTerminals: unusedTerms,
- }
-}
-
-func markUsedSymbols(mark map[string]bool, marked map[string]bool, prods map[string]*parser.ProductionNode, prod *parser.ProductionNode) {
- if marked[prod.LHS] {
- return
- }
-
- for _, alt := range prod.RHS {
- for _, e := range alt.Elements {
- if e.ID == "" {
- continue
- }
-
- mark[e.ID] = true
-
- p, ok := prods[e.ID]
- if !ok {
- continue
- }
-
- // Remove a production to avoid inifinite recursion.
- marked[prod.LHS] = true
-
- markUsedSymbols(mark, marked, prods, p)
- }
- }
-}
-
-func (b *GrammarBuilder) checkSpellingInconsistenciesOfUserDefinedIDs(root *parser.RootNode) {
- var ids []string
- {
- for _, prod := range root.Productions {
- ids = append(ids, prod.LHS)
- for _, alt := range prod.RHS {
- for _, elem := range alt.Elements {
- if elem.Label != nil {
- ids = append(ids, elem.Label.Name)
- }
- }
- }
- }
- for _, prod := range root.LexProductions {
- ids = append(ids, prod.LHS)
- }
- for _, dir := range root.Directives {
- dirIDs := collectUserDefinedIDsFromDirective(dir)
- if len(dirIDs) > 0 {
- ids = append(ids, dirIDs...)
- }
- }
- }
-
- duplicated := lexical.FindSpellingInconsistencies(ids)
- if len(duplicated) == 0 {
- return
- }
-
- for _, dup := range duplicated {
- var s string
- {
- var b strings.Builder
- fmt.Fprintf(&b, "%+v", dup[0])
- for _, id := range dup[1:] {
- fmt.Fprintf(&b, ", %+v", id)
- }
- s = b.String()
- }
-
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrSpellingInconsistency,
- Detail: s,
- })
- }
-}
-
-func collectUserDefinedIDsFromDirective(dir *parser.DirectiveNode) []string {
- var ids []string
- for _, param := range dir.Parameters {
- if param.Group != nil {
- for _, d := range param.Group {
- dIDs := collectUserDefinedIDsFromDirective(d)
- if len(dIDs) > 0 {
- ids = append(ids, dIDs...)
- }
- }
- }
- if param.OrderedSymbol != "" {
- ids = append(ids, param.OrderedSymbol)
- }
- }
- return ids
-}
-
-type symbols struct {
- errSym symbol.Symbol
- augStartSym symbol.Symbol
- startSym symbol.Symbol
-}
-
-func (b *GrammarBuilder) genSymbolTable(root *parser.RootNode) (*symbol.SymbolTable, *symbols, error) {
- symTab := symbol.NewSymbolTable()
- w := symTab.Writer()
- r := symTab.Reader()
-
- // We need to register the reserved symbol before registering others.
- var errSym symbol.Symbol
- {
- sym, err := w.RegisterTerminalSymbol(reservedSymbolNameError)
- if err != nil {
- return nil, nil, err
- }
- errSym = sym
- }
-
- for _, prod := range root.LexProductions {
- if sym, exist := r.ToSymbol(prod.LHS); exist {
- if sym == errSym {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrErrSymIsReserved,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
- } else {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateTerminal,
- Detail: prod.LHS,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
- }
-
- continue
- }
-
- _, err := w.RegisterTerminalSymbol(prod.LHS)
- if err != nil {
- return nil, nil, err
- }
- }
-
- startProd := root.Productions[0]
- augStartText := fmt.Sprintf("%s'", startProd.LHS)
- var err error
- augStartSym, err := w.RegisterStartSymbol(augStartText)
- if err != nil {
- return nil, nil, err
- }
- if augStartSym == errSym {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrErrSymIsReserved,
- Row: startProd.Pos.Row,
- Col: startProd.Pos.Col,
- })
- }
-
- startSym, err := w.RegisterNonTerminalSymbol(startProd.LHS)
- if err != nil {
- return nil, nil, err
- }
- if startSym == errSym {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrErrSymIsReserved,
- Row: startProd.Pos.Row,
- Col: startProd.Pos.Col,
- })
- }
-
- for _, prod := range root.Productions {
- sym, err := w.RegisterNonTerminalSymbol(prod.LHS)
- if err != nil {
- return nil, nil, err
- }
- if sym.IsTerminal() {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateName,
- Detail: prod.LHS,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
- }
- if sym == errSym {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrErrSymIsReserved,
- Row: prod.Pos.Row,
- Col: prod.Pos.Col,
- })
- }
- }
-
- return symTab, &symbols{
- errSym: errSym,
- augStartSym: augStartSym,
- startSym: startSym,
- }, nil
-}
-
-func (b *GrammarBuilder) genLexSpecAndSkipSymbols(symTab *symbol.SymbolTableReader, root *parser.RootNode) (*lexical.LexSpec, []symbol.Symbol, error) {
- entries := []*lexical.LexEntry{}
- skipSyms := []symbol.Symbol{}
- for _, prod := range root.LexProductions {
- entry, skip, specErr, err := genLexEntry(prod)
- if err != nil {
- return nil, nil, err
- }
- if specErr != nil {
- b.errs = append(b.errs, specErr)
- continue
- }
- if skip {
- sym, _ := symTab.ToSymbol(prod.LHS)
- skipSyms = append(skipSyms, sym)
- }
- entries = append(entries, entry)
- }
-
- checkedFragments := map[string]struct{}{}
- for _, fragment := range root.Fragments {
- if _, exist := checkedFragments[fragment.LHS]; exist {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateFragment,
- Detail: fragment.LHS,
- Row: fragment.Pos.Row,
- Col: fragment.Pos.Col,
- })
- continue
- }
- checkedFragments[fragment.LHS] = struct{}{}
-
- entries = append(entries, &lexical.LexEntry{
- Fragment: true,
- Kind: spec.LexKindName(fragment.LHS),
- Pattern: fragment.RHS,
- })
- }
-
- return &lexical.LexSpec{
- Entries: entries,
- }, skipSyms, nil
-}
-
-func genLexEntry(prod *parser.ProductionNode) (*lexical.LexEntry, bool, *verr.SpecError, error) {
- alt := prod.RHS[0]
- elem := alt.Elements[0]
-
- var pattern string
- if elem.Literally {
- pattern = spec.EscapePattern(elem.Pattern)
- } else {
- pattern = elem.Pattern
- }
-
- var modes []spec.LexModeName
- var skip bool
- var push spec.LexModeName
- var pop bool
- dirConsumed := map[string]struct{}{}
- for _, dir := range prod.Directives {
- if _, consumed := dirConsumed[dir.Name]; consumed {
- return nil, false, &verr.SpecError{
- Cause: semErrDuplicateDir,
- Detail: dir.Name,
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- }, nil
- }
- dirConsumed[dir.Name] = struct{}{}
-
- switch dir.Name {
- case "mode":
- if len(dir.Parameters) == 0 {
- return nil, false, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'mode' directive needs an ID parameter",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- }, nil
- }
- for _, param := range dir.Parameters {
- if param.ID == "" {
- return nil, false, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'mode' directive needs an ID parameter",
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- }, nil
- }
- modes = append(modes, spec.LexModeName(param.ID))
- }
- case "skip":
- if len(dir.Parameters) > 0 {
- return nil, false, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'skip' directive needs no parameter",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- }, nil
- }
- skip = true
- case "push":
- if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" {
- return nil, false, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'push' directive needs an ID parameter",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- }, nil
- }
- push = spec.LexModeName(dir.Parameters[0].ID)
- case "pop":
- if len(dir.Parameters) > 0 {
- return nil, false, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'pop' directive needs no parameter",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- }, nil
- }
- pop = true
- default:
- return nil, false, &verr.SpecError{
- Cause: semErrDirInvalidName,
- Detail: dir.Name,
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- }, nil
- }
- }
-
- if len(alt.Directives) > 0 {
- return nil, false, &verr.SpecError{
- Cause: semErrInvalidAltDir,
- Detail: "a lexical production cannot have alternative directives",
- Row: alt.Directives[0].Pos.Row,
- Col: alt.Directives[0].Pos.Col,
- }, nil
- }
-
- return &lexical.LexEntry{
- Modes: modes,
- Kind: spec.LexKindName(prod.LHS),
- Pattern: pattern,
- Push: push,
- Pop: pop,
- }, skip, nil, nil
-}
-
-type productionsAndActions struct {
- prods *productionSet
- augStartSym symbol.Symbol
- astActs map[productionID][]*astActionEntry
- prodPrecsTerm map[productionID]symbol.Symbol
- prodPrecsOrdSym map[productionID]string
- prodPrecPoss map[productionID]*parser.Position
- recoverProds map[productionID]struct{}
-}
-
-func (b *GrammarBuilder) genProductionsAndActions(root *parser.RootNode, symTab *symbol.SymbolTableReader, errSym symbol.Symbol, augStartSym symbol.Symbol, startSym symbol.Symbol) (*productionsAndActions, error) {
- if len(root.Productions) == 0 {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrNoProduction,
- })
- return nil, nil
- }
-
- prods := newProductionSet()
- astActs := map[productionID][]*astActionEntry{}
- prodPrecsTerm := map[productionID]symbol.Symbol{}
- prodPrecsOrdSym := map[productionID]string{}
- prodPrecPoss := map[productionID]*parser.Position{}
- recoverProds := map[productionID]struct{}{}
-
- p, err := newProduction(augStartSym, []symbol.Symbol{
- startSym,
- })
- if err != nil {
- return nil, err
- }
-
- prods.append(p)
-
- for _, prod := range root.Productions {
- lhsSym, ok := symTab.ToSymbol(prod.LHS)
- if !ok {
- // All symbols are assumed to be pre-detected, so it's a bug if we cannot find them here.
- return nil, fmt.Errorf("symbol '%v' is undefined", prod.LHS)
- }
-
- if len(prod.Directives) > 0 {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrInvalidProdDir,
- Detail: "a production cannot have production directives",
- Row: prod.Directives[0].Pos.Row,
- Col: prod.Directives[0].Pos.Col,
- })
- continue
- }
-
- LOOP_RHS:
- for _, alt := range prod.RHS {
- altSyms := make([]symbol.Symbol, len(alt.Elements))
- offsets := map[string]int{}
- ambiguousIDOffsets := map[string]struct{}{}
- for i, elem := range alt.Elements {
- sym, ok := symTab.ToSymbol(elem.ID)
- if !ok {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrUndefinedSym,
- Detail: elem.ID,
- Row: elem.Pos.Row,
- Col: elem.Pos.Col,
- })
- continue LOOP_RHS
- }
- altSyms[i] = sym
-
- if elem.Label != nil {
- if _, added := offsets[elem.Label.Name]; added {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateLabel,
- Detail: elem.Label.Name,
- Row: elem.Label.Pos.Row,
- Col: elem.Label.Pos.Col,
- })
- continue LOOP_RHS
- }
- if _, found := symTab.ToSymbol(elem.Label.Name); found {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrInvalidLabel,
- Detail: elem.Label.Name,
- Row: elem.Label.Pos.Row,
- Col: elem.Label.Pos.Col,
- })
- continue LOOP_RHS
- }
- offsets[elem.Label.Name] = i
- }
- // A symbol having a label can be specified by both the label and the symbol name.
- // So record the symbol's position, whether or not it has a label.
- if elem.ID != "" {
- if _, exist := offsets[elem.ID]; exist {
- // When the same symbol appears multiple times in an alternative, the symbol is ambiguous. When we need
- // to specify the symbol in a directive, we cannot use the name of the ambiguous symbol. Instead, specify
- // a label to resolve the ambiguity.
- delete(offsets, elem.ID)
- ambiguousIDOffsets[elem.ID] = struct{}{}
- } else {
- offsets[elem.ID] = i
- }
- }
- }
-
- p, err := newProduction(lhsSym, altSyms)
- if err != nil {
- return nil, err
- }
- if _, exist := prods.findByID(p.id); exist {
- // Report the line number of a duplicate alternative.
- // When the alternative is empty, we report the position of its LHS.
- var row int
- var col int
- if len(alt.Elements) > 0 {
- row = alt.Elements[0].Pos.Row
- col = alt.Elements[0].Pos.Col
- } else {
- row = prod.Pos.Row
- col = prod.Pos.Col
- }
-
- var detail string
- {
- var b strings.Builder
- fmt.Fprintf(&b, "%v →", prod.LHS)
- for _, elem := range alt.Elements {
- switch {
- case elem.ID != "":
- fmt.Fprintf(&b, " %v", elem.ID)
- case elem.Pattern != "":
- fmt.Fprintf(&b, ` "%v"`, elem.Pattern)
- }
- }
- if len(alt.Elements) == 0 {
- fmt.Fprintf(&b, " ε")
- }
-
- detail = b.String()
- }
-
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateProduction,
- Detail: detail,
- Row: row,
- Col: col,
- })
- continue LOOP_RHS
- }
- prods.append(p)
-
- dirConsumed := map[string]struct{}{}
- for _, dir := range alt.Directives {
- if _, consumed := dirConsumed[dir.Name]; consumed {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateDir,
- Detail: dir.Name,
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- }
- dirConsumed[dir.Name] = struct{}{}
-
- switch dir.Name {
- case "ast":
- if len(dir.Parameters) == 0 {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'ast' directive needs at least one parameter",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- continue LOOP_RHS
- }
- astAct := make([]*astActionEntry, len(dir.Parameters))
- consumedOffsets := map[int]struct{}{}
- for i, param := range dir.Parameters {
- if param.ID == "" {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'ast' directive can take only ID parameters",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- continue LOOP_RHS
- }
-
- if _, ambiguous := ambiguousIDOffsets[param.ID]; ambiguous {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrAmbiguousElem,
- Detail: fmt.Sprintf("'%v' is ambiguous", param.ID),
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- })
- continue LOOP_RHS
- }
-
- offset, ok := offsets[param.ID]
- if !ok {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("a symbol was not found in an alternative: %v", param.ID),
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- })
- continue LOOP_RHS
- }
- if _, consumed := consumedOffsets[offset]; consumed {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateElem,
- Detail: param.ID,
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- })
- continue LOOP_RHS
- }
- consumedOffsets[offset] = struct{}{}
-
- if param.Expansion {
- elem := alt.Elements[offset]
- if elem.Pattern != "" {
- // Currently, it is a bug to reach here because it is
- // forbidden to have anything other than ID appear in
- // production rules.
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("the expansion symbol cannot be applied to a pattern (%v: \"%v\")", param.ID, elem.Pattern),
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- })
- continue LOOP_RHS
- }
- elemSym, ok := symTab.ToSymbol(elem.ID)
- if !ok {
- // If the symbol was not found, it's a bug.
- return nil, fmt.Errorf("a symbol corresponding to an ID (%v) was not found", elem.ID)
- }
- if elemSym.IsTerminal() {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("the expansion symbol cannot be applied to a terminal symbol (%v: %v)", param.ID, elem.ID),
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- })
- continue LOOP_RHS
- }
- }
-
- astAct[i] = &astActionEntry{
- position: offset + 1,
- expansion: param.Expansion,
- }
- }
- astActs[p.id] = astAct
- case "prec":
- if len(dir.Parameters) != 1 || (dir.Parameters[0].ID == "" && dir.Parameters[0].OrderedSymbol == "") {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'prec' directive needs just one ID parameter or ordered symbol",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- continue LOOP_RHS
- }
- param := dir.Parameters[0]
- switch {
- case param.ID != "":
- sym, ok := symTab.ToSymbol(param.ID)
- if !ok {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("unknown terminal symbol: %v", param.ID),
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- })
- continue LOOP_RHS
- }
- if sym == errSym {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("'%v' directive cannot be applied to an error symbol", dir.Name),
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- })
- }
- if !sym.IsTerminal() {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("the symbol must be a terminal: %v", param.ID),
- Row: param.Pos.Row,
- Col: param.Pos.Col,
- })
- continue LOOP_RHS
- }
- prodPrecsTerm[p.id] = sym
- prodPrecPoss[p.id] = &param.Pos
- case param.OrderedSymbol != "":
- prodPrecsOrdSym[p.id] = param.OrderedSymbol
- prodPrecPoss[p.id] = &param.Pos
- }
- case "recover":
- if len(dir.Parameters) > 0 {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'recover' directive needs no parameter",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- continue LOOP_RHS
- }
- recoverProds[p.id] = struct{}{}
- default:
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidName,
- Detail: fmt.Sprintf("invalid directive name '%v'", dir.Name),
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- continue LOOP_RHS
- }
- }
- }
- }
-
- return &productionsAndActions{
- prods: prods,
- augStartSym: augStartSym,
- astActs: astActs,
- prodPrecsTerm: prodPrecsTerm,
- prodPrecsOrdSym: prodPrecsOrdSym,
- prodPrecPoss: prodPrecPoss,
- recoverProds: recoverProds,
- }, nil
-}
-
-func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbol.SymbolTableReader, errSym symbol.Symbol, prodsAndActs *productionsAndActions) (*precAndAssoc, error) {
- termPrec := map[symbol.SymbolNum]int{}
- termAssoc := map[symbol.SymbolNum]assocType{}
- ordSymPrec := map[string]int{}
- {
- var precGroup []*parser.DirectiveNode
- for _, dir := range b.AST.Directives {
- if dir.Name == "prec" {
- if dir.Parameters == nil || len(dir.Parameters) != 1 || dir.Parameters[0].Group == nil {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "'prec' needs just one directive group",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- continue
- }
- precGroup = dir.Parameters[0].Group
- continue
- }
-
- if dir.Name != "name" && dir.Name != "prec" {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidName,
- Detail: dir.Name,
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- continue
- }
- }
-
- precN := precMin
- for _, dir := range precGroup {
- var assocTy assocType
- switch dir.Name {
- case "left":
- assocTy = assocTypeLeft
- case "right":
- assocTy = assocTypeRight
- case "assign":
- assocTy = assocTypeNil
- default:
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidName,
- Detail: dir.Name,
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- return nil, nil
- }
-
- if len(dir.Parameters) == 0 {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "associativity needs at least one symbol",
- Row: dir.Pos.Row,
- Col: dir.Pos.Col,
- })
- return nil, nil
- }
- ASSOC_PARAM_LOOP:
- for _, p := range dir.Parameters {
- switch {
- case p.ID != "":
- sym, ok := symTab.ToSymbol(p.ID)
- if !ok {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("'%v' is undefined", p.ID),
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- return nil, nil
- }
- if sym == errSym {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("'%v' directive cannot be applied to an error symbol", dir.Name),
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- return nil, nil
- }
- if !sym.IsTerminal() {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: fmt.Sprintf("associativity can take only terminal symbol ('%v' is a non-terminal)", p.ID),
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- return nil, nil
- }
- if prec, alreadySet := termPrec[sym.Num()]; alreadySet {
- if prec == precN {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateAssoc,
- Detail: fmt.Sprintf("'%v' already has the same associativity and precedence", p.ID),
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- } else if assoc := termAssoc[sym.Num()]; assoc == assocTy {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateAssoc,
- Detail: fmt.Sprintf("'%v' already has different precedence", p.ID),
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- } else {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateAssoc,
- Detail: fmt.Sprintf("'%v' already has different associativity and precedence", p.ID),
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- }
- break ASSOC_PARAM_LOOP
- }
-
- termPrec[sym.Num()] = precN
- termAssoc[sym.Num()] = assocTy
- case p.OrderedSymbol != "":
- if prec, alreadySet := ordSymPrec[p.OrderedSymbol]; alreadySet {
- if prec == precN {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateAssoc,
- Detail: fmt.Sprintf("'$%v' already has the same precedence", p.OrderedSymbol),
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- } else {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDuplicateAssoc,
- Detail: fmt.Sprintf("'$%v' already has different precedence", p.OrderedSymbol),
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- }
- break ASSOC_PARAM_LOOP
- }
-
- ordSymPrec[p.OrderedSymbol] = precN
- default:
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrDirInvalidParam,
- Detail: "a parameter must be an ID or an ordered symbol",
- Row: p.Pos.Row,
- Col: p.Pos.Col,
- })
- return nil, nil
- }
- }
-
- precN++
- }
- }
- if len(b.errs) > 0 {
- return nil, nil
- }
-
- prodPrec := map[productionNum]int{}
- prodAssoc := map[productionNum]assocType{}
- for _, prod := range prodsAndActs.prods.getAllProductions() {
- // A #prec directive changes only precedence, not associativity.
- if term, ok := prodsAndActs.prodPrecsTerm[prod.id]; ok {
- if prec, ok := termPrec[term.Num()]; ok {
- prodPrec[prod.num] = prec
- prodAssoc[prod.num] = assocTypeNil
- } else {
- text, _ := symTab.ToText(term)
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrUndefinedPrec,
- Detail: text,
- Row: prodsAndActs.prodPrecPoss[prod.id].Row,
- Col: prodsAndActs.prodPrecPoss[prod.id].Col,
- })
- }
- } else if ordSym, ok := prodsAndActs.prodPrecsOrdSym[prod.id]; ok {
- if prec, ok := ordSymPrec[ordSym]; ok {
- prodPrec[prod.num] = prec
- prodAssoc[prod.num] = assocTypeNil
- } else {
- b.errs = append(b.errs, &verr.SpecError{
- Cause: semErrUndefinedOrdSym,
- Detail: fmt.Sprintf("$%v", ordSym),
- Row: prodsAndActs.prodPrecPoss[prod.id].Row,
- Col: prodsAndActs.prodPrecPoss[prod.id].Col,
- })
- }
- } else {
- // A production inherits precedence and associativity from the right-most terminal symbol.
- mostrightTerm := symbol.SymbolNil
- for _, sym := range prod.rhs {
- if !sym.IsTerminal() {
- continue
- }
- mostrightTerm = sym
- }
- if !mostrightTerm.IsNil() {
- prodPrec[prod.num] = termPrec[mostrightTerm.Num()]
- prodAssoc[prod.num] = termAssoc[mostrightTerm.Num()]
- }
- }
- }
- if len(b.errs) > 0 {
- return nil, nil
- }
-
- return &precAndAssoc{
- termPrec: termPrec,
- termAssoc: termAssoc,
- prodPrec: prodPrec,
- prodAssoc: prodAssoc,
- }, nil
-}
-
-func compile(gram *Grammar, opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) {
- config := &buildConfig{}
- for _, opt := range opts {
- opt(config)
- }
-
- lexSpec, err, cErrs := lexical.Compile(gram.lexSpec, lexical.CompressionLevelMax)
- if err != nil {
- if len(cErrs) > 0 {
- var b strings.Builder
- writeCompileError(&b, cErrs[0])
- for _, cerr := range cErrs[1:] {
- fmt.Fprintf(&b, "\n")
- writeCompileError(&b, cerr)
- }
- return nil, nil, fmt.Errorf(b.String())
- }
- return nil, nil, err
- }
-
- kind2Term := make([]int, len(lexSpec.KindNames))
- for i, k := range lexSpec.KindNames {
- if k == spec.LexKindNameNil {
- kind2Term[spec.LexKindIDNil] = symbol.SymbolNil.Num().Int()
- continue
- }
-
- sym, ok := gram.symbolTable.ToSymbol(k.String())
- if !ok {
- return nil, nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k)
- }
- kind2Term[i] = sym.Num().Int()
- }
-
- termTexts, err := gram.symbolTable.TerminalTexts()
- if err != nil {
- return nil, nil, err
- }
-
- var termSkip []int
- {
- r := gram.symbolTable.Reader()
- // I want to use gram.symbolTable.terminalSymbols() here instead of gram.symbolTable.terminalTexts(),
- // but gram.symbolTable.terminalSymbols() is different in length from terminalTexts
- // because it does not contain a predefined symbol, like EOF.
- // Therefore, we use terminalTexts, although it takes more time to lookup for symbols.
- termSkip = make([]int, len(termTexts))
- for _, t := range termTexts {
- s, _ := r.ToSymbol(t)
- for _, sk := range gram.skipSymbols {
- if s != sk {
- continue
- }
- termSkip[s.Num()] = 1
- break
- }
- }
- }
-
- nonTerms, err := gram.symbolTable.NonTerminalTexts()
- if err != nil {
- return nil, nil, err
- }
-
- firstSet, err := genFirstSet(gram.productionSet)
- if err != nil {
- return nil, nil, err
- }
-
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- return nil, nil, err
- }
-
- var tab *ParsingTable
- var report *spec.Report
- {
- lalr1, err := genLALR1Automaton(lr0, gram.productionSet, firstSet)
- if err != nil {
- return nil, nil, err
- }
-
- b := &lrTableBuilder{
- automaton: lalr1.lr0Automaton,
- prods: gram.productionSet,
- termCount: len(termTexts),
- nonTermCount: len(nonTerms),
- symTab: gram.symbolTable,
- precAndAssoc: gram.precAndAssoc,
- }
- tab, err = b.build()
- if err != nil {
- return nil, nil, err
- }
-
- if config.isReportingEnabled {
- report, err = b.genReport(tab, gram)
- if err != nil {
- return nil, nil, err
- }
- }
- }
-
- action := make([]int, len(tab.actionTable))
- for i, e := range tab.actionTable {
- action[i] = int(e)
- }
- goTo := make([]int, len(tab.goToTable))
- for i, e := range tab.goToTable {
- goTo[i] = int(e)
- }
-
- lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1)
- altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1)
- recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1)
- astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1)
- for _, p := range gram.productionSet.getAllProductions() {
- lhsSyms[p.num] = p.lhs.Num().Int()
- altSymCounts[p.num] = p.rhsLen
-
- if _, ok := gram.recoverProductions[p.id]; ok {
- recoverProds[p.num] = 1
- }
-
- astAct, ok := gram.astActions[p.id]
- if !ok {
- continue
- }
- astActEntry := make([]int, len(astAct))
- for i, e := range astAct {
- if e.expansion {
- astActEntry[i] = e.position * -1
- } else {
- astActEntry[i] = e.position
- }
- }
- astActEnties[p.num] = astActEntry
- }
-
- return &spec.CompiledGrammar{
- Name: gram.name,
- Lexical: lexSpec,
- Syntactic: &spec.SyntacticSpec{
- Action: action,
- GoTo: goTo,
- StateCount: tab.stateCount,
- InitialState: tab.InitialState.Int(),
- StartProduction: productionNumStart.Int(),
- LHSSymbols: lhsSyms,
- AlternativeSymbolCounts: altSymCounts,
- Terminals: termTexts,
- TerminalCount: tab.terminalCount,
- TerminalSkip: termSkip,
- KindToTerminal: kind2Term,
- NonTerminals: nonTerms,
- NonTerminalCount: tab.nonTerminalCount,
- EOFSymbol: symbol.SymbolEOF.Num().Int(),
- ErrorSymbol: gram.errorSymbol.Num().Int(),
- ErrorTrapperStates: tab.errorTrapperStates,
- RecoverProductions: recoverProds,
- },
- ASTAction: &spec.ASTAction{
- Entries: astActEnties,
- },
- }, report, nil
-}
-
-func writeCompileError(w io.Writer, cErr *lexical.CompileError) {
- if cErr.Fragment {
- fmt.Fprintf(w, "fragment ")
- }
- fmt.Fprintf(w, "%v: %v", cErr.Kind, cErr.Cause)
- if cErr.Detail != "" {
- fmt.Fprintf(w, ": %v", cErr.Detail)
- }
-}
diff --git a/grammar/grammar_test.go b/grammar/grammar_test.go
deleted file mode 100644
index 4a3c6a0..0000000
--- a/grammar/grammar_test.go
+++ /dev/null
@@ -1,3381 +0,0 @@
-package grammar
-
-import (
- "strings"
- "testing"
-
- verr "error"
- "spec/grammar/parser"
-)
-
-func TestGrammarBuilderOK(t *testing.T) {
- type okTest struct {
- caption string
- specSrc string
- validate func(t *testing.T, g *Grammar)
- }
-
- nameTests := []*okTest{
- {
- caption: "the `#name` can be the same identifier as a non-terminal symbol",
- specSrc: `
-#name s;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- validate: func(t *testing.T, g *Grammar) {
- expected := "s"
- if g.name != expected {
- t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name)
- }
- },
- },
- {
- caption: "the `#name` can be the same identifier as a terminal symbol",
- specSrc: `
-#name foo;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- validate: func(t *testing.T, g *Grammar) {
- expected := "foo"
- if g.name != expected {
- t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name)
- }
- },
- },
- {
- caption: "the `#name` can be the same identifier as the error symbol",
- specSrc: `
-#name error;
-
-s
- : foo
- | error
- ;
-
-foo
- : 'foo';
-`,
- validate: func(t *testing.T, g *Grammar) {
- expected := "error"
- if g.name != expected {
- t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name)
- }
- },
- },
- {
- caption: "the `#name` can be the same identifier as a fragment",
- specSrc: `
-#name f;
-
-s
- : foo
- ;
-
-foo
- : "\f{f}";
-fragment f
- : 'foo';
-`,
- validate: func(t *testing.T, g *Grammar) {
- expected := "f"
- if g.name != expected {
- t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name)
- }
- },
- },
- }
-
- modeTests := []*okTest{
- {
- caption: "a `#mode` can be the same identifier as a non-terminal symbol",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push s
- : 'foo';
-bar #mode s
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- kind := "bar"
- expectedMode := "s"
- for _, e := range g.lexSpec.Entries {
- if e.Kind.String() == kind && e.Modes[0].String() == expectedMode {
- return
- }
- }
- t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode)
- },
- },
- {
- caption: "a `#mode` can be the same identifier as a terminal symbol",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push bar
- : 'foo';
-bar #mode bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- kind := "bar"
- expectedMode := "bar"
- for _, e := range g.lexSpec.Entries {
- if e.Kind.String() == kind && e.Modes[0].String() == expectedMode {
- return
- }
- }
- t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode)
- },
- },
- {
- caption: "a `#mode` can be the same identifier as the error symbol",
- specSrc: `
-#name test;
-
-s
- : foo bar
- | error
- ;
-
-foo #push error
- : 'foo';
-bar #mode error
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- kind := "bar"
- expectedMode := "error"
- for _, e := range g.lexSpec.Entries {
- if e.Kind.String() == kind && e.Modes[0].String() == expectedMode {
- return
- }
- }
- t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode)
- },
- },
- {
- caption: "a `#mode` can be the same identifier as a fragment",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push f
- : "\f{f}";
-bar #mode f
- : 'bar';
-fragment f
- : 'foo';
-`,
- validate: func(t *testing.T, g *Grammar) {
- kind := "bar"
- expectedMode := "f"
- for _, e := range g.lexSpec.Entries {
- if e.Kind.String() == kind && e.Modes[0].String() == expectedMode {
- return
- }
- }
- t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode)
- },
- },
- }
-
- precTests := []*okTest{
- {
- caption: "a `#prec` allows the empty directive group",
- specSrc: `
-#name test;
-
-#prec ();
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- },
- {
- caption: "a `#left` directive gives a precedence and the left associativity to specified terminal symbols",
- specSrc: `
-#name test;
-
-#prec (
- #left foo bar
-);
-
-s
- : foo bar baz
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-baz
- : 'baz';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if fooPrec != 1 || fooAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if barPrec != 1 || barAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc)
- }
- var bazPrec int
- var bazAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("baz")
- bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if bazPrec != precNil || bazAssoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
- }
- },
- },
- {
- caption: "a `#right` directive gives a precedence and the right associativity to specified terminal symbols",
- specSrc: `
-#name test;
-
-#prec (
- #right foo bar
-);
-
-s
- : foo bar baz
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-baz
- : 'baz';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if fooPrec != 1 || fooAssoc != assocTypeRight {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, fooPrec, fooAssoc)
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if barPrec != 1 || barAssoc != assocTypeRight {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, barPrec, barAssoc)
- }
- var bazPrec int
- var bazAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("baz")
- bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if bazPrec != precNil || bazAssoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
- }
- },
- },
- {
- caption: "an `#assign` directive gives only a precedence to specified terminal symbols",
- specSrc: `
-#name test;
-
-#prec (
- #assign foo bar
-);
-
-s
- : foo bar baz
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-baz
- : 'baz';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if fooPrec != 1 || fooAssoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, fooPrec, fooAssoc)
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if barPrec != 1 || barAssoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, barPrec, barAssoc)
- }
- var bazPrec int
- var bazAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("baz")
- bazPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if bazPrec != precNil || bazAssoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc)
- }
- },
- },
- {
- caption: "a production has the same precedence and associativity as the right-most terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
-);
-
-s
- : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar', not 'foo'.
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var sPrec int
- var sAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- sPrec = g.precAndAssoc.productionPredence(ps[0].num)
- sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- }
- if barPrec != precNil || barAssoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, barPrec, barAssoc)
- }
- if sPrec != barPrec || sAssoc != barAssoc {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc)
- }
- },
- },
- {
- caption: "a production has the same precedence and associativity as the right-most terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
- #right bar
-);
-
-s
- : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar'.
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var sPrec int
- var sAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- sPrec = g.precAndAssoc.productionPredence(ps[0].num)
- sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- }
- if barPrec != 2 || barAssoc != assocTypeRight {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
- }
- if sPrec != barPrec || sAssoc != barAssoc {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc)
- }
- },
- },
- {
- caption: "even if a non-terminal symbol apears to a terminal symbol, a production inherits precedence and associativity from the right-most terminal symbol, not from the non-terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
- #right bar
-);
-
-s
- : foo a // This alternative has the same precedence and associativity as the right-most terminal symbol 'foo', not 'a'.
- ;
-a
- : bar
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var aPrec int
- var aAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("a")
- ps, _ := g.productionSet.findByLHS(s)
- aPrec = g.precAndAssoc.productionPredence(ps[0].num)
- aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- }
- var sPrec int
- var sAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- sPrec = g.precAndAssoc.productionPredence(ps[0].num)
- sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- }
- if fooPrec != 1 || fooAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
- }
- if barPrec != 2 || barAssoc != assocTypeRight {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
- }
- if aPrec != barPrec || aAssoc != barAssoc {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, aPrec, aAssoc)
- }
- if sPrec != fooPrec || sAssoc != fooAssoc {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, sPrec, sAssoc)
- }
- },
- },
- {
- caption: "each alternative in the same production can have its own precedence and associativity",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
- #right bar
- #assign baz
-);
-
-s
- : foo
- | bar
- | baz
- | bra
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-baz
- : 'baz';
-bra
- : 'bra';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var alt1Prec int
- var alt1Assoc assocType
- var alt2Prec int
- var alt2Assoc assocType
- var alt3Prec int
- var alt3Assoc assocType
- var alt4Prec int
- var alt4Assoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
- alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
- alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
- alt3Prec = g.precAndAssoc.productionPredence(ps[2].num)
- alt3Assoc = g.precAndAssoc.productionAssociativity(ps[2].num)
- alt4Prec = g.precAndAssoc.productionPredence(ps[3].num)
- alt4Assoc = g.precAndAssoc.productionAssociativity(ps[3].num)
- }
- if alt1Prec != 1 || alt1Assoc != assocTypeLeft {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, alt1Prec, alt1Assoc)
- }
- if alt2Prec != 2 || alt2Assoc != assocTypeRight {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, alt2Prec, alt2Assoc)
- }
- if alt3Prec != 3 || alt3Assoc != assocTypeNil {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt3Prec, alt3Assoc)
- }
- if alt4Prec != precNil || alt4Assoc != assocTypeNil {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt4Prec, alt4Assoc)
- }
- },
- },
- {
- caption: "when a production contains no terminal symbols, the production will not have precedence and associativiry",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
-);
-
-s
- : a
- ;
-a
- : foo
- ;
-
-foo
- : 'foo';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var aPrec int
- var aAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("a")
- ps, _ := g.productionSet.findByLHS(s)
- aPrec = g.precAndAssoc.productionPredence(ps[0].num)
- aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- }
- var sPrec int
- var sAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- sPrec = g.precAndAssoc.productionPredence(ps[0].num)
- sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- }
- if fooPrec != 1 || fooAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
- }
- if aPrec != fooPrec || aAssoc != fooAssoc {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, aPrec, aAssoc)
- }
- if sPrec != precNil || sAssoc != assocTypeNil {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, sPrec, sAssoc)
- }
- },
- },
- {
- caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
-);
-
-s
- : foo bar #prec foo
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var sPrec int
- var sAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- sPrec = g.precAndAssoc.productionPredence(ps[0].num)
- sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- }
- if fooPrec != 1 || fooAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
- }
- if sPrec != fooPrec || sAssoc != assocTypeNil {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc)
- }
- },
- },
- {
- caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
- #right bar
-);
-
-s
- : foo bar #prec foo
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var sPrec int
- var sAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- sPrec = g.precAndAssoc.productionPredence(ps[0].num)
- sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- }
- if fooPrec != 1 || fooAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
- }
- if barPrec != 2 || barAssoc != assocTypeRight {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
- }
- if sPrec != fooPrec || sAssoc != assocTypeNil {
- t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc)
- }
- },
- },
- {
- caption: "an ordered symbol can appear in a `#left` directive",
- specSrc: `
-#name test;
-
-#prec (
- #left $high
- #right foo bar
- #left $low
-);
-
-s
- : foo #prec $high
- | bar #prec $low
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if fooPrec != 2 || fooAssoc != assocTypeRight {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, fooPrec, fooAssoc)
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if barPrec != 2 || barAssoc != assocTypeRight {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc)
- }
- var alt1Prec int
- var alt1Assoc assocType
- var alt2Prec int
- var alt2Assoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
- alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
- alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
- }
- if alt1Prec != 1 || alt1Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc)
- }
- if alt2Prec != 3 || alt2Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc)
- }
- },
- },
- {
- caption: "an ordered symbol can appear in a `#right` directive",
- specSrc: `
-#name test;
-
-#prec (
- #right $high
- #left foo bar
- #right $low
-);
-
-s
- : foo #prec $high
- | bar #prec $low
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if fooPrec != 2 || fooAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc)
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if barPrec != 2 || barAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, barPrec, barAssoc)
- }
- var alt1Prec int
- var alt1Assoc assocType
- var alt2Prec int
- var alt2Assoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
- alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
- alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
- }
- if alt1Prec != 1 || alt1Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc)
- }
- if alt2Prec != 3 || alt2Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc)
- }
- },
- },
- {
- caption: "an ordered symbol can appear in a `#assign` directive",
- specSrc: `
-#name test;
-
-#prec (
- #assign $high
- #left foo
- #right bar
- #assign $low
-);
-
-s
- : foo #prec $high
- | bar #prec $low
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if fooPrec != 2 || fooAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc)
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if barPrec != 3 || barAssoc != assocTypeRight {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeRight, barPrec, barAssoc)
- }
- var alt1Prec int
- var alt1Assoc assocType
- var alt2Prec int
- var alt2Assoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
- alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
- alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
- }
- if alt1Prec != 1 || alt1Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc)
- }
- if alt2Prec != 4 || alt2Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 4, assocTypeNil, alt2Prec, alt2Assoc)
- }
- },
- },
- {
- caption: "names of an ordered symbol and a terminal symbol can duplicate",
- specSrc: `
-#name test;
-
-#prec (
- #left foo bar
- #right $foo
-);
-
-s
- : foo
- | bar #prec $foo
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var fooPrec int
- var fooAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("foo")
- fooPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if fooPrec != 1 || fooAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc)
- }
- if barPrec != 1 || barAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc)
- }
- var alt1Prec int
- var alt1Assoc assocType
- var alt2Prec int
- var alt2Assoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
- alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
- alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
- }
- if alt1Prec != fooPrec || alt1Assoc != fooAssoc {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, alt1Prec, alt1Assoc)
- }
- if alt2Prec != 2 || alt2Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc)
- }
- },
- },
- {
- caption: "names of an ordered symbol and a non-terminal symbol can duplicate",
- specSrc: `
-#name test;
-
-#prec (
- #left foo bar
- #right $a
-);
-
-s
- : a
- | bar #prec $a
- ;
-a
- : foo
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- validate: func(t *testing.T, g *Grammar) {
- var barPrec int
- var barAssoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("bar")
- barPrec = g.precAndAssoc.terminalPrecedence(s.Num())
- barAssoc = g.precAndAssoc.terminalAssociativity(s.Num())
- }
- if barPrec != 1 || barAssoc != assocTypeLeft {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc)
- }
- var alt1Prec int
- var alt1Assoc assocType
- var alt2Prec int
- var alt2Assoc assocType
- {
- s, _ := g.symbolTable.ToSymbol("s")
- ps, _ := g.productionSet.findByLHS(s)
- alt1Prec = g.precAndAssoc.productionPredence(ps[0].num)
- alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num)
- alt2Prec = g.precAndAssoc.productionPredence(ps[1].num)
- alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num)
- }
- if alt1Prec != precNil || alt1Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt1Prec, alt1Assoc)
- }
- if alt2Prec != 2 || alt2Assoc != assocTypeNil {
- t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc)
- }
- },
- },
- }
-
- var tests []*okTest
- tests = append(tests, nameTests...)
- tests = append(tests, modeTests...)
- tests = append(tests, precTests...)
-
- for _, test := range tests {
- t.Run(test.caption, func(t *testing.T) {
- ast, err := parser.Parse(strings.NewReader(test.specSrc))
- if err != nil {
- t.Fatal(err)
- }
-
- b := GrammarBuilder{
- AST: ast,
- }
- g, err := b.build()
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
- if test.validate != nil {
- test.validate(t, g)
- }
- })
- }
-}
-
-func TestGrammarBuilderSpecError(t *testing.T) {
- type specErrTest struct {
- caption string
- specSrc string
- errs []error
- }
-
- spellingInconsistenciesTests := []*specErrTest{
- {
- caption: "a spelling inconsistency appears among non-terminal symbols",
- specSrc: `
-#name test;
-
-a1
- : a_1
- ;
-a_1
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrSpellingInconsistency},
- },
- {
- caption: "a spelling inconsistency appears among terminal symbols",
- specSrc: `
-#name test;
-
-s
- : foo1 foo_1
- ;
-
-foo1
- : 'foo1';
-foo_1
- : 'foo_1';
-`,
- errs: []error{semErrSpellingInconsistency},
- },
- {
- caption: "a spelling inconsistency appears among non-terminal and terminal symbols",
- specSrc: `
-#name test;
-
-a1
- : a_1
- ;
-
-a_1
- : 'a_1';
-`,
- errs: []error{semErrSpellingInconsistency},
- },
- {
- caption: "a spelling inconsistency appears among ordered symbols whose precedence is the same",
- specSrc: `
-#name test;
-
-#prec (
- #assign $p1 $p_1
-);
-
-s
- : foo #prec $p1
- | bar #prec $p_1
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrSpellingInconsistency},
- },
- {
- caption: "a spelling inconsistency appears among ordered symbols whose precedence is not the same",
- specSrc: `
-#name test;
-
-#prec (
- #assign $p1
- #assign $p_1
-);
-
-s
- : foo #prec $p1
- | bar #prec $p_1
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrSpellingInconsistency},
- },
- {
- caption: "a spelling inconsistency appears among labels the same alternative contains",
- specSrc: `
-#name test;
-
-s
- : foo@l1 foo@l_1
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrSpellingInconsistency},
- },
- {
- caption: "a spelling inconsistency appears among labels the same production contains",
- specSrc: `
-#name test;
-
-s
- : foo@l1
- | bar@l_1
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrSpellingInconsistency},
- },
- {
- caption: "a spelling inconsistency appears among labels different productions contain",
- specSrc: `
-#name test;
-
-s
- : foo@l1
- ;
-a
- : bar@l_1
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrSpellingInconsistency},
- },
- }
-
- prodTests := []*specErrTest{
- {
- caption: "a production `b` is unused",
- specSrc: `
-#name test;
-
-a
- : foo
- ;
-b
- : foo
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrUnusedProduction},
- },
- {
- caption: "a terminal symbol `bar` is unused",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo
- : "foo";
-bar
- : "bar";
-`,
- errs: []error{semErrUnusedTerminal},
- },
- {
- caption: "a production `b` and terminal symbol `bar` is unused",
- specSrc: `
-#name test;
-
-a
- : foo
- ;
-b
- : bar
- ;
-
-foo
- : "foo";
-bar
- : "bar";
-`,
- errs: []error{
- semErrUnusedProduction,
- semErrUnusedTerminal,
- },
- },
- {
- caption: "a production cannot have production directives",
- specSrc: `
-#name test;
-
-s #prec foo
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrInvalidProdDir},
- },
- {
- caption: "a lexical production cannot have alternative directives",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo
- : 'foo' #skip;
-`,
- errs: []error{semErrInvalidAltDir},
- },
- {
- caption: "a production directive must not be duplicated",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo #skip #skip
- : 'foo';
-`,
- errs: []error{semErrDuplicateDir},
- },
- {
- caption: "an alternative directive must not be duplicated",
- specSrc: `
-#name test;
-
-s
- : foo bar #ast foo bar #ast foo bar
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDuplicateDir},
- },
- {
- caption: "a production must not have a duplicate alternative (non-empty alternatives)",
- specSrc: `
-#name test;
-
-s
- : foo
- | foo
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDuplicateProduction},
- },
- {
- caption: "a production must not have a duplicate alternative (non-empty and split alternatives)",
- specSrc: `
-#name test;
-
-s
- : foo
- | a
- ;
-a
- : bar
- ;
-s
- : foo
- ;
-
-foo
- : "foo";
-bar
- : "bar";
-`,
- errs: []error{semErrDuplicateProduction},
- },
- {
- caption: "a production must not have a duplicate alternative (empty alternatives)",
- specSrc: `
-#name test;
-
-s
- : foo
- | a
- ;
-a
- :
- |
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDuplicateProduction},
- },
- {
- caption: "a production must not have a duplicate alternative (empty and split alternatives)",
- specSrc: `
-#name test;
-
-s
- : foo
- | a
- ;
-a
- :
- | foo
- ;
-a
- :
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDuplicateProduction},
- },
- {
- caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo
- : "foo";
-s
- : "a";
-`,
- errs: []error{semErrDuplicateName},
- },
- {
- caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates",
- specSrc: `
-#name test;
-
-s
- : foo
- | a
- ;
-a
- : bar
- ;
-
-foo
- : "foo";
-bar
- : "bar";
-a
- : "a";
-`,
- errs: []error{semErrDuplicateName},
- },
- {
- caption: "an invalid top-level directive",
- specSrc: `
-#name test;
-
-#foo;
-
-s
- : a
- ;
-
-a
- : 'a';
-`,
- errs: []error{semErrDirInvalidName},
- },
- {
- caption: "a label must be unique in an alternative",
- specSrc: `
-#name test;
-
-s
- : foo@x bar@x
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDuplicateLabel},
- },
- {
- caption: "a label cannot be the same name as terminal symbols",
- specSrc: `
-#name test;
-
-s
- : foo bar@foo
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDuplicateLabel},
- },
- {
- caption: "a label cannot be the same name as non-terminal symbols",
- specSrc: `
-#name test;
-
-s
- : foo@a
- | a
- ;
-a
- : bar
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{
- semErrInvalidLabel,
- },
- },
- }
-
- nameDirTests := []*specErrTest{
- {
- caption: "the `#name` directive is required",
- specSrc: `
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrNoGrammarName},
- },
- {
- caption: "the `#name` directive needs an ID parameter",
- specSrc: `
-#name;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#name` directive cannot take a pattern parameter",
- specSrc: `
-#name "test";
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#name` directive cannot take a string parameter",
- specSrc: `
-#name 'test';
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#name` directive takes just one parameter",
- specSrc: `
-#name test1 test2;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- }
-
- precDirTests := []*specErrTest{
- {
- caption: "the `#prec` directive needs a directive group parameter",
- specSrc: `
-#name test;
-
-#prec;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take an ID parameter",
- specSrc: `
-#name test;
-
-#prec foo;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take an ordered symbol parameter",
- specSrc: `
-#name test;
-
-#prec $x;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-#prec "foo";
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-#prec 'foo';
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive takes just one directive group parameter",
- specSrc: `
-#name test;
-
-#prec () ();
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- }
-
- leftDirTests := []*specErrTest{
- {
- caption: "the `#left` directive needs ID parameters",
- specSrc: `
-#name test;
-
-#prec (
- #left
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#left` directive cannot be applied to an error symbol",
- specSrc: `
-#name test;
-
-#prec (
- #left error
-);
-
-s
- : foo semi_colon
- | error semi_colon
- ;
-
-foo
- : 'foo';
-semi_colon
- : ';';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#left` directive cannot take an undefined symbol",
- specSrc: `
-#name test;
-
-#prec (
- #left x
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#left` directive cannot take a non-terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #left s
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#left` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-#prec (
- #left "foo"
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#left` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-#prec (
- #left 'foo'
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#left` directive cannot take a directive parameter",
- specSrc: `
-#name test;
-
-#prec (
- #left ()
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#left` dirctive cannot be specified multiple times for a terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #left foo foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "the `#left` dirctive cannot be specified multiple times for an ordered symbol",
- specSrc: `
-#name test;
-
-#prec (
- #left $x $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "a terminal symbol cannot have different precedence",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
- #left foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "an ordered symbol cannot have different precedence",
- specSrc: `
-#name test;
-
-#prec (
- #left $x
- #left $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "a terminal symbol cannot have different associativity",
- specSrc: `
-#name test;
-
-#prec (
- #right foo
- #left foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "an ordered symbol cannot have different associativity",
- specSrc: `
-#name test;
-
-#prec (
- #right $x
- #left $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- }
-
- rightDirTests := []*specErrTest{
- {
- caption: "the `#right` directive needs ID parameters",
- specSrc: `
-#name test;
-
-#prec (
- #right
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#right` directive cannot be applied to an error symbol",
- specSrc: `
-#name test;
-
-#prec (
- #right error
-);
-
-s
- : foo semi_colon
- | error semi_colon
- ;
-
-foo
- : 'foo';
-semi_colon
- : ';';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#right` directive cannot take an undefined symbol",
- specSrc: `
-#name test;
-
-#prec (
- #right x
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#right` directive cannot take a non-terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #right s
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#right` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-#prec (
- #right "foo"
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#right` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-#prec (
- #right 'foo'
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#right` directive cannot take a directive group parameter",
- specSrc: `
-#name test;
-
-#prec (
- #right ()
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#right` directive cannot be specified multiple times for a terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #right foo foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "the `#right` directive cannot be specified multiple times for an ordered symbol",
- specSrc: `
-#name test;
-
-#prec (
- #right $x $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "a terminal symbol cannot have different precedence",
- specSrc: `
-#name test;
-
-#prec (
- #right foo
- #right foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "an ordered symbol cannot have different precedence",
- specSrc: `
-#name test;
-
-#prec (
- #right $x
- #right $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "a terminal symbol cannot have different associativity",
- specSrc: `
-#name test;
-
-#prec (
- #left foo
- #right foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "an ordered symbol cannot have different associativity",
- specSrc: `
-#name test;
-
-#prec (
- #left $x
- #right $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- }
-
- assignDirTests := []*specErrTest{
- {
- caption: "the `#assign` directive needs ID parameters",
- specSrc: `
-#name test;
-
-#prec (
- #assign
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#assign` directive cannot be applied to an error symbol",
- specSrc: `
-#name test;
-
-#prec (
- #assign error
-);
-
-s
- : foo semi_colon
- | error semi_colon
- ;
-
-foo
- : 'foo';
-semi_colon
- : ';';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#assign` directive cannot take an undefined symbol",
- specSrc: `
-#name test;
-
-#prec (
- #assign x
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#assign` directive cannot take a non-terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #assign s
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#assign` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign "foo"
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#assign` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign 'foo'
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#assign` directive cannot take a directive parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign ()
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#assign` dirctive cannot be specified multiple times for a terminal symbol",
- specSrc: `
-#name test;
-
-#prec (
- #assign foo foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "the `#assign` dirctive cannot be specified multiple times for an ordered symbol",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "a terminal symbol cannot have different precedence",
- specSrc: `
-#name test;
-
-#prec (
- #assign foo
- #assign foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "an ordered symbol cannot have different precedence",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x
- #assign $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "a terminal symbol cannot have different associativity",
- specSrc: `
-#name test;
-
-#prec (
- #assign foo
- #left foo
-);
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- {
- caption: "an ordered symbol cannot have different associativity",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x
- #left $x
-);
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateAssoc},
- },
- }
-
- errorSymTests := []*specErrTest{
- {
- caption: "cannot use the error symbol as a non-terminal symbol",
- specSrc: `
-#name test;
-
-s
- : error
- ;
-error
- : foo
- ;
-
-foo: 'foo';
-`,
- errs: []error{
- semErrErrSymIsReserved,
- semErrDuplicateName,
- },
- },
- {
- caption: "cannot use the error symbol as a terminal symbol",
- specSrc: `
-#name test;
-
-s
- : error
- ;
-
-error: 'error';
-`,
- errs: []error{semErrErrSymIsReserved},
- },
- {
- caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo
- : 'foo';
-error #skip
- : 'error';
-`,
- errs: []error{semErrErrSymIsReserved},
- },
- }
-
- astDirTests := []*specErrTest{
- {
- caption: "the `#ast` directive needs ID or label prameters",
- specSrc: `
-#name test;
-
-s
- : foo #ast
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#ast` directive cannot take an ordered symbol parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x
-);
-
-s
- : foo #ast $x
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#ast` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-s
- : foo #ast "foo"
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#ast` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-s
- : foo #ast 'foo'
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#ast` directive cannot take a directive group parameter",
- specSrc: `
-#name test;
-
-s
- : foo #ast ()
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative",
- specSrc: `
-#name test;
-
-s
- : foo bar #ast foo x
- ;
-
-foo
- : "foo";
-bar
- : "bar";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive",
- specSrc: `
-#name test;
-
-s
- : foo #ast bar
- | bar
- ;
-
-foo
- : "foo";
-bar
- : "bar";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "a label in a different alternative cannot be a parameter of the `#ast` directive",
- specSrc: `
-#name test;
-
-s
- : foo #ast b
- | bar@b
- ;
-
-foo
- : "foo";
-bar
- : "bar";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "a symbol can appear in the `#ast` directive only once",
- specSrc: `
-#name test;
-
-s
- : foo #ast foo foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateElem},
- },
- {
- caption: "a label can appear in the `#ast` directive only once",
- specSrc: `
-#name test;
-
-s
- : foo@x #ast x x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateElem},
- },
- {
- caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label",
- specSrc: `
-#name test;
-
-s
- : foo@x #ast foo x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDuplicateElem},
- },
- {
- caption: "symbol `foo` is ambiguous because it appears in an alternative twice",
- specSrc: `
-#name test;
-
-s
- : foo foo #ast foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrAmbiguousElem},
- },
- {
- caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label",
- specSrc: `
-#name test;
-
-s
- : foo@x foo #ast foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrAmbiguousElem},
- },
- {
- caption: "the expansion operator cannot be applied to a terminal symbol",
- specSrc: `
-#name test;
-
-s
- : foo #ast foo...
- ;
-
-foo
- : "foo";
-`,
- errs: []error{semErrDirInvalidParam},
- },
- }
-
- altPrecDirTests := []*specErrTest{
- {
- caption: "the `#prec` directive needs an ID parameter or an ordered symbol parameter",
- specSrc: `
-#name test;
-
-s
- : foo #prec
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot be applied to an error symbol",
- specSrc: `
-#name test;
-
-s
- : foo #prec error
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take an undefined symbol",
- specSrc: `
-#name test;
-
-s
- : foo #prec x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take a non-terminal symbol",
- specSrc: `
-#name test;
-
-s
- : a #prec b
- | b
- ;
-a
- : foo
- ;
-b
- : bar
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take an undefined ordered symbol parameter",
- specSrc: `
-#name test;
-
-s
- : foo #prec $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrUndefinedOrdSym},
- },
- {
- caption: "the `#prec` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-s
- : foo #prec "foo"
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-s
- : foo #prec 'foo'
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#prec` directive cannot take a directive parameter",
- specSrc: `
-#name test;
-
-s
- : foo #prec ()
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "a symbol the `#prec` directive takes must be given precedence explicitly",
- specSrc: `
-#name test;
-
-s
- : foo bar #prec foo
- ;
-
-foo
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrUndefinedPrec},
- },
- }
-
- recoverDirTests := []*specErrTest{
- {
- caption: "the `#recover` directive cannot take an ID parameter",
- specSrc: `
-#name test;
-
-s
- : foo #recover foo
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#recover` directive cannot take an ordered symbol parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x
-);
-
-s
- : foo #recover $x
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#recover` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-s
- : foo #recover "foo"
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#recover` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-s
- : foo #recover 'foo'
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#recover` directive cannot take a directive group parameter",
- specSrc: `
-#name test;
-
-s
- : foo #recover ()
- ;
-
-foo
- : 'foo';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- }
-
- fragmentTests := []*specErrTest{
- {
- caption: "a production cannot contain a fragment",
- specSrc: `
-#name test;
-
-s
- : f
- ;
-
-fragment f
- : 'fragment';
-`,
- errs: []error{semErrUndefinedSym},
- },
- {
- caption: "fragments cannot be duplicated",
- specSrc: `
-#name test;
-
-s
- : foo
- ;
-
-foo
- : "\f{f}";
-fragment f
- : 'fragment 1';
-fragment f
- : 'fragment 2';
-`,
- errs: []error{semErrDuplicateFragment},
- },
- }
-
- modeDirTests := []*specErrTest{
- {
- caption: "the `#mode` directive needs an ID parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#mode` directive cannot take an ordered symbol parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x
-);
-
-s
- : foo bar
- ;
-
-foo
- : 'foo';
-bar #mode $x
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#mode` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode "mode_1"
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#mode` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode 'mode_1'
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#mode` directive cannot take a directive group parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode ()
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- }
-
- pushDirTests := []*specErrTest{
- {
- caption: "the `#push` directive needs an ID parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push
- : 'foo';
-bar #mode mode_1
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#push` directive takes just one ID parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push mode_1 mode_2
- : 'foo';
-bar #mode mode_1
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#push` directive cannot take an ordered symbol parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x
-);
-
-s
- : foo bar
- ;
-
-foo #push $x
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#push` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push "mode_1"
- : 'foo';
-bar #mode mode_1
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#push` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push 'mode_1'
- : 'foo';
-bar #mode mode_1
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#push` directive cannot take a directive group parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #push ()
- : 'foo';
-bar #mode mode_1
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- }
-
- popDirTests := []*specErrTest{
- {
- caption: "the `#pop` directive cannot take an ID parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar baz
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode mode_1
- : 'bar';
-baz #pop mode_1
- : 'baz';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#pop` directive cannot take an ordered symbol parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x
-);
-
-s
- : foo bar baz
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode mode_1
- : 'bar';
-baz #pop $x
- : 'baz';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#pop` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar baz
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode mode_1
- : 'bar';
-baz #pop "mode_1"
- : 'baz';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#pop` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar baz
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode mode_1
- : 'bar';
-baz #pop 'mode_1'
- : 'baz';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#pop` directive cannot take a directive parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar baz
- ;
-
-foo #push mode_1
- : 'foo';
-bar #mode mode_1
- : 'bar';
-baz #pop ()
- : 'baz';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- }
-
- skipDirTests := []*specErrTest{
- {
- caption: "the `#skip` directive cannot take an ID parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #skip bar
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#skip` directive cannot take an ordered symbol parameter",
- specSrc: `
-#name test;
-
-#prec (
- #assign $x
-);
-
-s
- : foo bar
- ;
-
-foo #skip $x
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#skip` directive cannot take a pattern parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #skip "bar"
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#skip` directive cannot take a string parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #skip 'bar'
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "the `#skip` directive cannot take a directive group parameter",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #skip ()
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrDirInvalidParam},
- },
- {
- caption: "a terminal symbol used in productions cannot have the skip directive",
- specSrc: `
-#name test;
-
-s
- : foo bar
- ;
-
-foo #skip
- : 'foo';
-bar
- : 'bar';
-`,
- errs: []error{semErrTermCannotBeSkipped},
- },
- }
-
- var tests []*specErrTest
- tests = append(tests, spellingInconsistenciesTests...)
- tests = append(tests, prodTests...)
- tests = append(tests, nameDirTests...)
- tests = append(tests, precDirTests...)
- tests = append(tests, leftDirTests...)
- tests = append(tests, rightDirTests...)
- tests = append(tests, assignDirTests...)
- tests = append(tests, errorSymTests...)
- tests = append(tests, astDirTests...)
- tests = append(tests, altPrecDirTests...)
- tests = append(tests, recoverDirTests...)
- tests = append(tests, fragmentTests...)
- tests = append(tests, modeDirTests...)
- tests = append(tests, pushDirTests...)
- tests = append(tests, popDirTests...)
- tests = append(tests, skipDirTests...)
- for _, test := range tests {
- t.Run(test.caption, func(t *testing.T) {
- ast, err := parser.Parse(strings.NewReader(test.specSrc))
- if err != nil {
- t.Fatal(err)
- }
-
- b := GrammarBuilder{
- AST: ast,
- }
- _, err = b.build()
- if err == nil {
- t.Fatal("an expected error didn't occur")
- }
- specErrs, ok := err.(verr.SpecErrors)
- if !ok {
- t.Fatalf("unexpected error type: want: %T, got: %T: %v", verr.SpecErrors{}, err, err)
- }
- if len(specErrs) != len(test.errs) {
- t.Fatalf("unexpected spec error count: want: %+v, got: %+v", test.errs, specErrs)
- }
- for _, expected := range test.errs {
- for _, actual := range specErrs {
- if actual.Cause == expected {
- return
- }
- }
- }
- t.Fatalf("an expected spec error didn't occur: want: %v, got: %+v", test.errs, specErrs)
- })
- }
-}
diff --git a/grammar/item.go b/grammar/item.go
deleted file mode 100644
index aa1ecaf..0000000
--- a/grammar/item.go
+++ /dev/null
@@ -1,206 +0,0 @@
-package grammar
-
-import (
- "crypto/sha256"
- "encoding/binary"
- "fmt"
- "sort"
- "strconv"
-
- "grammar/symbol"
-)
-
-type lrItemID [32]byte
-
-func (id lrItemID) String() string {
- return fmt.Sprintf("%x", id.num())
-}
-
-func (id lrItemID) num() uint32 {
- return binary.LittleEndian.Uint32(id[:])
-}
-
-type lookAhead struct {
- symbols map[symbol.Symbol]struct{}
-
- // When propagation is true, an item propagates look-ahead symbols to other items.
- propagation bool
-}
-
-type lrItem struct {
- id lrItemID
- prod productionID
-
- // E → E + T
- //
- // Dot | Dotted Symbol | Item
- // ----+---------------+------------
- // 0 | E | E →・E + T
- // 1 | + | E → E・+ T
- // 2 | T | E → E +・T
- // 3 | Nil | E → E + T・
- dot int
- dottedSymbol symbol.Symbol
-
- // When initial is true, the LHS of the production is the augmented start symbol and dot is 0.
- // It looks like S' →・S.
- initial bool
-
- // When reducible is true, the item looks like E → E + T・.
- reducible bool
-
- // When kernel is true, the item is kernel item.
- kernel bool
-
- // lookAhead stores look-ahead symbols, and they are terminal symbols.
- // The item is reducible only when the look-ahead symbols appear as the next input symbol.
- lookAhead lookAhead
-}
-
-func newLR0Item(prod *production, dot int) (*lrItem, error) {
- if prod == nil {
- return nil, fmt.Errorf("production must be non-nil")
- }
-
- if dot < 0 || dot > prod.rhsLen {
- return nil, fmt.Errorf("dot must be between 0 and %v", prod.rhsLen)
- }
-
- var id lrItemID
- {
- b := []byte{}
- b = append(b, prod.id[:]...)
- bDot := make([]byte, 8)
- binary.LittleEndian.PutUint64(bDot, uint64(dot))
- b = append(b, bDot...)
- id = sha256.Sum256(b)
- }
-
- dottedSymbol := symbol.SymbolNil
- if dot < prod.rhsLen {
- dottedSymbol = prod.rhs[dot]
- }
-
- initial := false
- if prod.lhs.IsStart() && dot == 0 {
- initial = true
- }
-
- reducible := false
- if dot == prod.rhsLen {
- reducible = true
- }
-
- kernel := false
- if initial || dot > 0 {
- kernel = true
- }
-
- item := &lrItem{
- id: id,
- prod: prod.id,
- dot: dot,
- dottedSymbol: dottedSymbol,
- initial: initial,
- reducible: reducible,
- kernel: kernel,
- }
-
- return item, nil
-}
-
-type kernelID [32]byte
-
-func (id kernelID) String() string {
- return fmt.Sprintf("%x", binary.LittleEndian.Uint32(id[:]))
-}
-
-type kernel struct {
- id kernelID
- items []*lrItem
-}
-
-func newKernel(items []*lrItem) (*kernel, error) {
- if len(items) == 0 {
- return nil, fmt.Errorf("a kernel need at least one item")
- }
-
- // Remove duplicates from items.
- var sortedItems []*lrItem
- {
- m := map[lrItemID]*lrItem{}
- for _, item := range items {
- if !item.kernel {
- return nil, fmt.Errorf("not a kernel item: %v", item)
- }
- m[item.id] = item
- }
- sortedItems = []*lrItem{}
- for _, item := range m {
- sortedItems = append(sortedItems, item)
- }
- sort.Slice(sortedItems, func(i, j int) bool {
- return sortedItems[i].id.num() < sortedItems[j].id.num()
- })
- }
-
- var id kernelID
- {
- b := []byte{}
- for _, item := range sortedItems {
- b = append(b, item.id[:]...)
- }
- id = sha256.Sum256(b)
- }
-
- return &kernel{
- id: id,
- items: sortedItems,
- }, nil
-}
-
-type stateNum int
-
-const stateNumInitial = stateNum(0)
-
-func (n stateNum) Int() int {
- return int(n)
-}
-
-func (n stateNum) String() string {
- return strconv.Itoa(int(n))
-}
-
-func (n stateNum) next() stateNum {
- return stateNum(n + 1)
-}
-
-type lrState struct {
- *kernel
- num stateNum
- next map[symbol.Symbol]kernelID
- reducible map[productionID]struct{}
-
- // emptyProdItems stores items that have an empty production like `p → ε` and is reducible.
- // Thus the items emptyProdItems stores are like `p → ・ε`. emptyProdItems is needed to store
- // look-ahead symbols because the kernel items don't include these items.
- //
- // For instance, we have the following productions, and A is a terminal symbol.
- //
- // s' → s
- // s → A | ε
- //
- // CLOSURE({s' → ・s}) generates the following closure, but the kernel of this closure doesn't
- // include `s → ・ε`.
- //
- // s' → ・s
- // s → ・A
- // s → ・ε
- emptyProdItems []*lrItem
-
- // When isErrorTrapper is `true`, the item can shift the `error` symbol. The item has the following form.
- // The `α` and `β` can be empty.
- //
- // A → α・error β
- isErrorTrapper bool
-}
diff --git a/grammar/lalr1.go b/grammar/lalr1.go
deleted file mode 100644
index 4c7265d..0000000
--- a/grammar/lalr1.go
+++ /dev/null
@@ -1,318 +0,0 @@
-package grammar
-
-import (
- "fmt"
-
- "grammar/symbol"
-)
-
-type stateAndLRItem struct {
- kernelID kernelID
- itemID lrItemID
-}
-
-type propagation struct {
- src *stateAndLRItem
- dest []*stateAndLRItem
-}
-
-type lalr1Automaton struct {
- *lr0Automaton
-}
-
-func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet) (*lalr1Automaton, error) {
- // Set the look-ahead symbol <EOF> to the initial item: [S' → ・S, $]
- iniState := lr0.states[lr0.initialState]
- iniState.items[0].lookAhead.symbols = map[symbol.Symbol]struct{}{
- symbol.SymbolEOF: {},
- }
-
- var props []*propagation
- for _, state := range lr0.states {
- for _, kItem := range state.items {
- items, err := genLALR1Closure(kItem, prods, first)
- if err != nil {
- return nil, err
- }
-
- kItem.lookAhead.propagation = true
-
- var propDests []*stateAndLRItem
- for _, item := range items {
- if item.reducible {
- p, ok := prods.findByID(item.prod)
- if !ok {
- return nil, fmt.Errorf("production not found: %v", item.prod)
- }
-
- if p.isEmpty() {
- var reducibleItem *lrItem
- for _, it := range state.emptyProdItems {
- if it.id != item.id {
- continue
- }
-
- reducibleItem = it
- break
- }
- if reducibleItem == nil {
- return nil, fmt.Errorf("reducible item not found: %v", item.id)
- }
- if reducibleItem.lookAhead.symbols == nil {
- reducibleItem.lookAhead.symbols = map[symbol.Symbol]struct{}{}
- }
- for a := range item.lookAhead.symbols {
- reducibleItem.lookAhead.symbols[a] = struct{}{}
- }
-
- propDests = append(propDests, &stateAndLRItem{
- kernelID: state.id,
- itemID: item.id,
- })
- }
-
- continue
- }
-
- nextKID := state.next[item.dottedSymbol]
- var nextItemID lrItemID
- {
- p, ok := prods.findByID(item.prod)
- if !ok {
- return nil, fmt.Errorf("production not found: %v", item.prod)
- }
- it, err := newLR0Item(p, item.dot+1)
- if err != nil {
- return nil, fmt.Errorf("failed to generate an item ID: %v", err)
- }
- nextItemID = it.id
- }
-
- if item.lookAhead.propagation {
- propDests = append(propDests, &stateAndLRItem{
- kernelID: nextKID,
- itemID: nextItemID,
- })
- } else {
- nextState := lr0.states[nextKID]
- var nextItem *lrItem
- for _, it := range nextState.items {
- if it.id != nextItemID {
- continue
- }
- nextItem = it
- break
- }
- if nextItem == nil {
- return nil, fmt.Errorf("item not found: %v", nextItemID)
- }
-
- if nextItem.lookAhead.symbols == nil {
- nextItem.lookAhead.symbols = map[symbol.Symbol]struct{}{}
- }
-
- for a := range item.lookAhead.symbols {
- nextItem.lookAhead.symbols[a] = struct{}{}
- }
- }
- }
- if len(propDests) == 0 {
- continue
- }
-
- props = append(props, &propagation{
- src: &stateAndLRItem{
- kernelID: state.id,
- itemID: kItem.id,
- },
- dest: propDests,
- })
- }
- }
-
- err := propagateLookAhead(lr0, props)
- if err != nil {
- return nil, fmt.Errorf("failed to propagate look-ahead symbols: %v", err)
- }
-
- return &lalr1Automaton{
- lr0Automaton: lr0,
- }, nil
-}
-
-func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]*lrItem, error) {
- items := []*lrItem{}
- knownItems := map[lrItemID]map[symbol.Symbol]struct{}{}
- knownItemsProp := map[lrItemID]struct{}{}
- uncheckedItems := []*lrItem{}
- items = append(items, srcItem)
- uncheckedItems = append(uncheckedItems, srcItem)
- for len(uncheckedItems) > 0 {
- nextUncheckedItems := []*lrItem{}
- for _, item := range uncheckedItems {
- if item.dottedSymbol.IsTerminal() {
- continue
- }
-
- p, ok := prods.findByID(item.prod)
- if !ok {
- return nil, fmt.Errorf("production not found: %v", item.prod)
- }
-
- var fstSyms []symbol.Symbol
- var isFstNullable bool
- {
- fst, err := first.find(p, item.dot+1)
- if err != nil {
- return nil, err
- }
-
- fstSyms = make([]symbol.Symbol, len(fst.symbols))
- i := 0
- for s := range fst.symbols {
- fstSyms[i] = s
- i++
- }
- if fst.empty {
- isFstNullable = true
- }
- }
-
- ps, _ := prods.findByLHS(item.dottedSymbol)
- for _, prod := range ps {
- var lookAhead []symbol.Symbol
- {
- var lookAheadCount int
- if isFstNullable {
- lookAheadCount = len(fstSyms) + len(item.lookAhead.symbols)
- } else {
- lookAheadCount = len(fstSyms)
- }
-
- lookAhead = make([]symbol.Symbol, lookAheadCount)
- i := 0
- for _, s := range fstSyms {
- lookAhead[i] = s
- i++
- }
- if isFstNullable {
- for a := range item.lookAhead.symbols {
- lookAhead[i] = a
- i++
- }
- }
- }
-
- for _, a := range lookAhead {
- newItem, err := newLR0Item(prod, 0)
- if err != nil {
- return nil, err
- }
- if items, exist := knownItems[newItem.id]; exist {
- if _, exist := items[a]; exist {
- continue
- }
- }
-
- newItem.lookAhead.symbols = map[symbol.Symbol]struct{}{
- a: {},
- }
-
- items = append(items, newItem)
- if knownItems[newItem.id] == nil {
- knownItems[newItem.id] = map[symbol.Symbol]struct{}{}
- }
- knownItems[newItem.id][a] = struct{}{}
- nextUncheckedItems = append(nextUncheckedItems, newItem)
- }
-
- if isFstNullable {
- newItem, err := newLR0Item(prod, 0)
- if err != nil {
- return nil, err
- }
- if _, exist := knownItemsProp[newItem.id]; exist {
- continue
- }
-
- newItem.lookAhead.propagation = true
-
- items = append(items, newItem)
- knownItemsProp[newItem.id] = struct{}{}
- nextUncheckedItems = append(nextUncheckedItems, newItem)
- }
- }
- }
- uncheckedItems = nextUncheckedItems
- }
-
- return items, nil
-}
-
-func propagateLookAhead(lr0 *lr0Automaton, props []*propagation) error {
- for {
- changed := false
- for _, prop := range props {
- srcState, ok := lr0.states[prop.src.kernelID]
- if !ok {
- return fmt.Errorf("source state not found: %v", prop.src.kernelID)
- }
- var srcItem *lrItem
- for _, item := range srcState.items {
- if item.id != prop.src.itemID {
- continue
- }
- srcItem = item
- break
- }
- if srcItem == nil {
- return fmt.Errorf("source item not found: %v", prop.src.itemID)
- }
-
- for _, dest := range prop.dest {
- destState, ok := lr0.states[dest.kernelID]
- if !ok {
- return fmt.Errorf("destination state not found: %v", dest.kernelID)
- }
- var destItem *lrItem
- for _, item := range destState.items {
- if item.id != dest.itemID {
- continue
- }
- destItem = item
- break
- }
- if destItem == nil {
- for _, item := range destState.emptyProdItems {
- if item.id != dest.itemID {
- continue
- }
- destItem = item
- break
- }
- if destItem == nil {
- return fmt.Errorf("destination item not found: %v", dest.itemID)
- }
- }
-
- for a := range srcItem.lookAhead.symbols {
- if _, ok := destItem.lookAhead.symbols[a]; ok {
- continue
- }
-
- if destItem.lookAhead.symbols == nil {
- destItem.lookAhead.symbols = map[symbol.Symbol]struct{}{}
- }
-
- destItem.lookAhead.symbols[a] = struct{}{}
- changed = true
- }
- }
- }
- if !changed {
- break
- }
- }
-
- return nil
-}
diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go
deleted file mode 100644
index d08468d..0000000
--- a/grammar/lalr1_test.go
+++ /dev/null
@@ -1,187 +0,0 @@
-package grammar
-
-import (
- "strings"
- "testing"
-
- "grammar/symbol"
- "spec/grammar/parser"
-)
-
-func TestGenLALR1Automaton(t *testing.T) {
- // This grammar belongs to LALR(1) class, not SLR(1).
- src := `
-#name test;
-
-s: l eq r | r;
-l: ref r | id;
-r: l;
-eq: '=';
-ref: '*';
-id: "[A-Za-z0-9_]+";
-`
-
- var gram *Grammar
- var automaton *lalr1Automaton
- {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err = b.build()
- if err != nil {
- t.Fatal(err)
- }
-
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- t.Fatalf("failed to create a LR0 automaton: %v", err)
- }
-
- firstSet, err := genFirstSet(gram.productionSet)
- if err != nil {
- t.Fatalf("failed to create a FIRST set: %v", err)
- }
-
- automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet)
- if err != nil {
- t.Fatalf("failed to create a LALR1 automaton: %v", err)
- }
- if automaton == nil {
- t.Fatalf("genLALR1Automaton returns nil without any error")
- }
- }
-
- initialState := automaton.states[automaton.initialState]
- if initialState == nil {
- t.Errorf("failed to get an initial status: %v", automaton.initialState)
- }
-
- genSym := newTestSymbolGenerator(t, gram.symbolTable)
- genProd := newTestProductionGenerator(t, genSym)
- genLR0Item := newTestLR0ItemGenerator(t, genProd)
-
- expectedKernels := map[int][]*lrItem{
- 0: {
- withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
- },
- 1: {
- withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
- },
- 2: {
- withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
- withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
- },
- 3: {
- withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
- },
- 4: {
- withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
- },
- 5: {
- withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
- },
- 6: {
- withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
- },
- 7: {
- withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
- },
- 8: {
- withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
- },
- 9: {
- withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
- },
- }
-
- expectedStates := []*expectedLRState{
- {
- kernelItems: expectedKernels[0],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("s"): expectedKernels[1],
- genSym("l"): expectedKernels[2],
- genSym("r"): expectedKernels[3],
- genSym("ref"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[1],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s'", "s"),
- },
- },
- {
- kernelItems: expectedKernels[2],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("eq"): expectedKernels[6],
- },
- reducibleProds: []*production{
- genProd("r", "l"),
- },
- },
- {
- kernelItems: expectedKernels[3],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s", "r"),
- },
- },
- {
- kernelItems: expectedKernels[4],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("r"): expectedKernels[7],
- genSym("l"): expectedKernels[8],
- genSym("ref"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[5],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("l", "id"),
- },
- },
- {
- kernelItems: expectedKernels[6],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("r"): expectedKernels[9],
- genSym("l"): expectedKernels[8],
- genSym("ref"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[7],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("l", "ref", "r"),
- },
- },
- {
- kernelItems: expectedKernels[8],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("r", "l"),
- },
- },
- {
- kernelItems: expectedKernels[9],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s", "l", "eq", "r"),
- },
- },
- }
-
- testLRAutomaton(t, expectedStates, automaton.lr0Automaton)
-}
diff --git a/grammar/lexical/compiler.go b/grammar/lexical/compiler.go
deleted file mode 100644
index 06e4068..0000000
--- a/grammar/lexical/compiler.go
+++ /dev/null
@@ -1,413 +0,0 @@
-package lexical
-
-import (
- "bytes"
- "fmt"
-
- "compressor"
- "grammar/lexical/dfa"
- psr "grammar/lexical/parser"
- spec "spec/grammar"
-)
-
-type CompileError struct {
- Kind spec.LexKindName
- Fragment bool
- Cause error
- Detail string
-}
-
-func Compile(lexspec *LexSpec, compLv int) (*spec.LexicalSpec, error, []*CompileError) {
- err := lexspec.Validate()
- if err != nil {
- return nil, fmt.Errorf("invalid lexical specification:\n%w", err), nil
- }
-
- modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries)
-
- modeSpecs := []*spec.CompiledLexModeSpec{
- nil,
- }
- for i, es := range modeEntries[1:] {
- modeName := modeNames[i+1]
- modeSpec, err, cerrs := compile(es, modeName2ID, fragmetns, compLv)
- if err != nil {
- return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err), cerrs
- }
- modeSpecs = append(modeSpecs, modeSpec)
- }
-
- var kindNames []spec.LexKindName
- var name2ID map[spec.LexKindName]spec.LexKindID
- {
- name2ID = map[spec.LexKindName]spec.LexKindID{}
- id := spec.LexKindIDMin
- for _, modeSpec := range modeSpecs[1:] {
- for _, name := range modeSpec.KindNames[1:] {
- if _, ok := name2ID[name]; ok {
- continue
- }
- name2ID[name] = id
- id++
- }
- }
-
- kindNames = make([]spec.LexKindName, len(name2ID)+1)
- for name, id := range name2ID {
- kindNames[id] = name
- }
- }
-
- var kindIDs [][]spec.LexKindID
- {
- kindIDs = make([][]spec.LexKindID, len(modeSpecs))
- for i, modeSpec := range modeSpecs[1:] {
- ids := make([]spec.LexKindID, len(modeSpec.KindNames))
- for modeID, name := range modeSpec.KindNames {
- if modeID == 0 {
- continue
- }
- ids[modeID] = name2ID[name]
- }
- kindIDs[i+1] = ids
- }
- }
-
- return &spec.LexicalSpec{
- InitialModeID: spec.LexModeIDDefault,
- ModeNames: modeNames,
- KindNames: kindNames,
- KindIDs: kindIDs,
- CompressionLevel: compLv,
- Specs: modeSpecs,
- }, nil, nil
-}
-
-func groupEntriesByLexMode(entries []*LexEntry) ([][]*LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[spec.LexKindName]*LexEntry) {
- modeNames := []spec.LexModeName{
- spec.LexModeNameNil,
- spec.LexModeNameDefault,
- }
- modeName2ID := map[spec.LexModeName]spec.LexModeID{
- spec.LexModeNameNil: spec.LexModeIDNil,
- spec.LexModeNameDefault: spec.LexModeIDDefault,
- }
- lastModeID := spec.LexModeIDDefault
- modeEntries := [][]*LexEntry{
- nil,
- {},
- }
- fragments := map[spec.LexKindName]*LexEntry{}
- for _, e := range entries {
- if e.Fragment {
- fragments[e.Kind] = e
- continue
- }
- ms := e.Modes
- if len(ms) == 0 {
- ms = []spec.LexModeName{
- spec.LexModeNameDefault,
- }
- }
- for _, modeName := range ms {
- modeID, ok := modeName2ID[modeName]
- if !ok {
- modeID = lastModeID + 1
- lastModeID = modeID
- modeName2ID[modeName] = modeID
- modeNames = append(modeNames, modeName)
- modeEntries = append(modeEntries, []*LexEntry{})
- }
- modeEntries[modeID] = append(modeEntries[modeID], e)
- }
- }
- return modeEntries, modeNames, modeName2ID, fragments
-}
-
-func compile(
- entries []*LexEntry,
- modeName2ID map[spec.LexModeName]spec.LexModeID,
- fragments map[spec.LexKindName]*LexEntry,
- compLv int,
-) (*spec.CompiledLexModeSpec, error, []*CompileError) {
- var kindNames []spec.LexKindName
- kindIDToName := map[spec.LexModeKindID]spec.LexKindName{}
- var patterns map[spec.LexModeKindID][]byte
- {
- kindNames = append(kindNames, spec.LexKindNameNil)
- patterns = map[spec.LexModeKindID][]byte{}
- for i, e := range entries {
- kindID := spec.LexModeKindID(i + 1)
-
- kindNames = append(kindNames, e.Kind)
- kindIDToName[kindID] = e.Kind
- patterns[kindID] = []byte(e.Pattern)
- }
- }
-
- push := []spec.LexModeID{
- spec.LexModeIDNil,
- }
- pop := []int{
- 0,
- }
- for _, e := range entries {
- pushV := spec.LexModeIDNil
- if e.Push != "" {
- pushV = modeName2ID[e.Push]
- }
- push = append(push, pushV)
- popV := 0
- if e.Pop {
- popV = 1
- }
- pop = append(pop, popV)
- }
-
- fragmentPatterns := map[spec.LexKindName][]byte{}
- for k, e := range fragments {
- fragmentPatterns[k] = []byte(e.Pattern)
- }
-
- fragmentCPTrees := make(map[spec.LexKindName]psr.CPTree, len(fragmentPatterns))
- {
- var cerrs []*CompileError
- for kind, pat := range fragmentPatterns {
- p := psr.NewParser(kind, bytes.NewReader(pat))
- t, err := p.Parse()
- if err != nil {
- if err == psr.ParseErr {
- detail, cause := p.Error()
- cerrs = append(cerrs, &CompileError{
- Kind: kind,
- Fragment: true,
- Cause: cause,
- Detail: detail,
- })
- } else {
- cerrs = append(cerrs, &CompileError{
- Kind: kind,
- Fragment: true,
- Cause: err,
- })
- }
- continue
- }
- fragmentCPTrees[kind] = t
- }
- if len(cerrs) > 0 {
- return nil, fmt.Errorf("compile error"), cerrs
- }
-
- err := psr.CompleteFragments(fragmentCPTrees)
- if err != nil {
- if err == psr.ParseErr {
- for _, frag := range fragmentCPTrees {
- kind, frags, err := frag.Describe()
- if err != nil {
- return nil, err, nil
- }
-
- cerrs = append(cerrs, &CompileError{
- Kind: kind,
- Fragment: true,
- Cause: fmt.Errorf("fragment contains undefined fragments or cycles"),
- Detail: fmt.Sprintf("%v", frags),
- })
- }
-
- return nil, fmt.Errorf("compile error"), cerrs
- }
-
- return nil, err, nil
- }
- }
-
- cpTrees := map[spec.LexModeKindID]psr.CPTree{}
- {
- pats := make([]*psr.PatternEntry, len(patterns)+1)
- pats[spec.LexModeKindIDNil] = &psr.PatternEntry{
- ID: spec.LexModeKindIDNil,
- }
- for id, pattern := range patterns {
- pats[id] = &psr.PatternEntry{
- ID: id,
- Pattern: pattern,
- }
- }
-
- var cerrs []*CompileError
- for _, pat := range pats {
- if pat.ID == spec.LexModeKindIDNil {
- continue
- }
-
- p := psr.NewParser(kindIDToName[pat.ID], bytes.NewReader(pat.Pattern))
- t, err := p.Parse()
- if err != nil {
- if err == psr.ParseErr {
- detail, cause := p.Error()
- cerrs = append(cerrs, &CompileError{
- Kind: kindIDToName[pat.ID],
- Fragment: false,
- Cause: cause,
- Detail: detail,
- })
- } else {
- cerrs = append(cerrs, &CompileError{
- Kind: kindIDToName[pat.ID],
- Fragment: false,
- Cause: err,
- })
- }
- continue
- }
-
- complete, err := psr.ApplyFragments(t, fragmentCPTrees)
- if err != nil {
- return nil, err, nil
- }
- if !complete {
- _, frags, err := t.Describe()
- if err != nil {
- return nil, err, nil
- }
-
- cerrs = append(cerrs, &CompileError{
- Kind: kindIDToName[pat.ID],
- Fragment: false,
- Cause: fmt.Errorf("pattern contains undefined fragments"),
- Detail: fmt.Sprintf("%v", frags),
- })
- continue
- }
-
- cpTrees[pat.ID] = t
- }
- if len(cerrs) > 0 {
- return nil, fmt.Errorf("compile error"), cerrs
- }
- }
-
- var tranTab *spec.TransitionTable
- {
- root, symTab, err := dfa.ConvertCPTreeToByteTree(cpTrees)
- if err != nil {
- return nil, err, nil
- }
- d := dfa.GenDFA(root, symTab)
- tranTab, err = dfa.GenTransitionTable(d)
- if err != nil {
- return nil, err, nil
- }
- }
-
- var err error
- switch compLv {
- case 2:
- tranTab, err = compressTransitionTableLv2(tranTab)
- if err != nil {
- return nil, err, nil
- }
- case 1:
- tranTab, err = compressTransitionTableLv1(tranTab)
- if err != nil {
- return nil, err, nil
- }
- }
-
- return &spec.CompiledLexModeSpec{
- KindNames: kindNames,
- Push: push,
- Pop: pop,
- DFA: tranTab,
- }, nil, nil
-}
-
-const (
- CompressionLevelMin = 0
- CompressionLevelMax = 2
-)
-
-func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
- ueTab := compressor.NewUniqueEntriesTable()
- {
- orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
- if err != nil {
- return nil, err
- }
- err = ueTab.Compress(orig)
- if err != nil {
- return nil, err
- }
- }
-
- rdTab := compressor.NewRowDisplacementTable(0)
- {
- orig, err := compressor.NewOriginalTable(ueTab.UniqueEntries, ueTab.OriginalColCount)
- if err != nil {
- return nil, err
- }
- err = rdTab.Compress(orig)
- if err != nil {
- return nil, err
- }
- }
-
- tranTab.Transition = &spec.UniqueEntriesTable{
- UniqueEntries: &spec.RowDisplacementTable{
- OriginalRowCount: rdTab.OriginalRowCount,
- OriginalColCount: rdTab.OriginalColCount,
- EmptyValue: spec.StateIDNil,
- Entries: convertIntSliceToStateIDSlice(rdTab.Entries),
- Bounds: rdTab.Bounds,
- RowDisplacement: rdTab.RowDisplacement,
- },
- RowNums: ueTab.RowNums,
- OriginalRowCount: ueTab.OriginalRowCount,
- OriginalColCount: ueTab.OriginalColCount,
- }
- tranTab.UncompressedTransition = nil
-
- return tranTab, nil
-}
-
-func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
- ueTab := compressor.NewUniqueEntriesTable()
- {
- orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
- if err != nil {
- return nil, err
- }
- err = ueTab.Compress(orig)
- if err != nil {
- return nil, err
- }
- }
-
- tranTab.Transition = &spec.UniqueEntriesTable{
- UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries),
- RowNums: ueTab.RowNums,
- OriginalRowCount: ueTab.OriginalRowCount,
- OriginalColCount: ueTab.OriginalColCount,
- }
- tranTab.UncompressedTransition = nil
-
- return tranTab, nil
-}
-
-func convertStateIDSliceToIntSlice(s []spec.StateID) []int {
- is := make([]int, len(s))
- for i, v := range s {
- is[i] = v.Int()
- }
- return is
-}
-
-func convertIntSliceToStateIDSlice(s []int) []spec.StateID {
- ss := make([]spec.StateID, len(s))
- for i, v := range s {
- ss[i] = spec.StateID(v)
- }
- return ss
-}
diff --git a/grammar/lexical/compiler_test.go b/grammar/lexical/compiler_test.go
deleted file mode 100644
index 3336048..0000000
--- a/grammar/lexical/compiler_test.go
+++ /dev/null
@@ -1,338 +0,0 @@
-package lexical
-
-import (
- "encoding/json"
- "fmt"
- "testing"
-
- spec "spec/grammar"
-)
-
-func TestLexSpec_Validate(t *testing.T) {
- // We expect that the spelling inconsistency error will occur.
- spec := &LexSpec{
- Entries: []*LexEntry{
- {
- Modes: []spec.LexModeName{
- // 'Default' is the spelling inconsistency because 'default' is predefined.
- "Default",
- },
- Kind: "foo",
- Pattern: "foo",
- },
- },
- }
- err := spec.Validate()
- if err == nil {
- t.Fatalf("expected error didn't occur")
- }
-}
-
-func TestSnakeCaseToUpperCamelCase(t *testing.T) {
- tests := []struct {
- snake string
- camel string
- }{
- {
- snake: "foo",
- camel: "Foo",
- },
- {
- snake: "foo_bar",
- camel: "FooBar",
- },
- {
- snake: "foo_bar_baz",
- camel: "FooBarBaz",
- },
- {
- snake: "Foo",
- camel: "Foo",
- },
- {
- snake: "fooBar",
- camel: "FooBar",
- },
- {
- snake: "FOO",
- camel: "FOO",
- },
- {
- snake: "FOO_BAR",
- camel: "FOOBAR",
- },
- {
- snake: "_foo_bar_",
- camel: "FooBar",
- },
- {
- snake: "___foo___bar___",
- camel: "FooBar",
- },
- }
- for _, tt := range tests {
- c := SnakeCaseToUpperCamelCase(tt.snake)
- if c != tt.camel {
- t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c)
- }
- }
-}
-
-func TestFindSpellingInconsistencies(t *testing.T) {
- tests := []struct {
- ids []string
- duplicated [][]string
- }{
- {
- ids: []string{"foo", "foo"},
- duplicated: nil,
- },
- {
- ids: []string{"foo", "Foo"},
- duplicated: [][]string{{"Foo", "foo"}},
- },
- {
- ids: []string{"foo", "foo", "Foo"},
- duplicated: [][]string{{"Foo", "foo"}},
- },
- {
- ids: []string{"foo_bar_baz", "FooBarBaz"},
- duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}},
- },
- {
- ids: []string{"foo", "Foo", "bar", "Bar"},
- duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
- },
- {
- ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"},
- duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
- duplicated := FindSpellingInconsistencies(tt.ids)
- if len(duplicated) != len(tt.duplicated) {
- t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated)
- }
- for i, dupIDs := range duplicated {
- if len(dupIDs) != len(tt.duplicated[i]) {
- t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
- }
- for j, id := range dupIDs {
- if id != tt.duplicated[i][j] {
- t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
- }
- }
- }
- })
- }
-}
-
-func TestCompile(t *testing.T) {
- tests := []struct {
- Caption string
- Spec string
- Err bool
- }{
- {
- Caption: "allow duplicates names between fragments and non-fragments",
- Spec: `
-{
- "name": "test",
- "entries": [
- {
- "kind": "a2z",
- "pattern": "\\f{a2z}"
- },
- {
- "fragment": true,
- "kind": "a2z",
- "pattern": "[a-z]"
- }
- ]
-}
-`,
- },
- {
- Caption: "don't allow duplicates names in non-fragments",
- Spec: `
-{
- "name": "test",
- "entries": [
- {
- "kind": "a2z",
- "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z"
- },
- {
- "kind": "a2z",
- "pattern": "[a-z]"
- }
- ]
-}
-`,
- Err: true,
- },
- {
- Caption: "don't allow duplicates names in fragments",
- Spec: `
-{
- "name": "test",
- "entries": [
- {
- "kind": "a2z",
- "pattern": "\\f{a2z}"
- },
- {
- "fragments": true,
- "kind": "a2z",
- "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z"
- },
- {
- "fragments": true,
- "kind": "a2z",
- "pattern": "[a-z]"
- }
- ]
-}
-`,
- Err: true,
- },
- {
- Caption: "don't allow kind names in the same mode to contain spelling inconsistencies",
- Spec: `
-{
- "name": "test",
- "entries": [
- {
- "kind": "foo_1",
- "pattern": "foo_1"
- },
- {
- "kind": "foo1",
- "pattern": "foo1"
- }
- ]
-}
-`,
- Err: true,
- },
- {
- Caption: "don't allow kind names across modes to contain spelling inconsistencies",
- Spec: `
-{
- "name": "test",
- "entries": [
- {
- "modes": ["default"],
- "kind": "foo_1",
- "pattern": "foo_1"
- },
- {
- "modes": ["other_mode"],
- "kind": "foo1",
- "pattern": "foo1"
- }
- ]
-}
-`,
- Err: true,
- },
- {
- Caption: "don't allow mode names to contain spelling inconsistencies",
- Spec: `
-{
- "name": "test",
- "entries": [
- {
- "modes": ["foo_1"],
- "kind": "a",
- "pattern": "a"
- },
- {
- "modes": ["foo1"],
- "kind": "b",
- "pattern": "b"
- }
- ]
-}
-`,
- Err: true,
- },
- {
- Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files",
- Spec: `
-{
- "name": "test",
- "entries": [
- {
- "kind": "a",
- "pattern": "a"
- },
- {
- "fragment": true,
- "kind": "foo_1",
- "pattern": "foo_1"
- },
- {
- "fragment": true,
- "kind": "foo1",
- "pattern": "foo1"
- }
- ]
-}
-`,
- },
- {
- Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files",
- Spec: `
-{
- "name": "test",
- "entries": [
- {
- "modes": ["default"],
- "kind": "a",
- "pattern": "a"
- },
- {
- "modes": ["default"],
- "fragment": true,
- "kind": "foo_1",
- "pattern": "foo_1"
- },
- {
- "modes": ["other_mode"],
- "fragment": true,
- "kind": "foo1",
- "pattern": "foo1"
- }
- ]
-}
-`,
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) {
- lspec := &LexSpec{}
- err := json.Unmarshal([]byte(tt.Spec), lspec)
- if err != nil {
- t.Fatalf("%v", err)
- }
- clspec, err, _ := Compile(lspec, CompressionLevelMin)
- if tt.Err {
- if err == nil {
- t.Fatalf("expected an error")
- }
- if clspec != nil {
- t.Fatalf("Compile function mustn't return a compiled specification")
- }
- } else {
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
- if clspec == nil {
- t.Fatalf("Compile function must return a compiled specification")
- }
- }
- })
- }
-}
diff --git a/grammar/lexical/dfa/dfa.go b/grammar/lexical/dfa/dfa.go
deleted file mode 100644
index 884b168..0000000
--- a/grammar/lexical/dfa/dfa.go
+++ /dev/null
@@ -1,173 +0,0 @@
-package dfa
-
-import (
- "sort"
-
- spec "spec/grammar"
-)
-
-type symbolTable struct {
- symPos2Byte map[symbolPosition]byteRange
- endPos2ID map[symbolPosition]spec.LexModeKindID
-}
-
-func genSymbolTable(root byteTree) *symbolTable {
- symTab := &symbolTable{
- symPos2Byte: map[symbolPosition]byteRange{},
- endPos2ID: map[symbolPosition]spec.LexModeKindID{},
- }
- return genSymTab(symTab, root)
-}
-
-func genSymTab(symTab *symbolTable, node byteTree) *symbolTable {
- if node == nil {
- return symTab
- }
-
- switch n := node.(type) {
- case *symbolNode:
- symTab.symPos2Byte[n.pos] = byteRange{
- from: n.from,
- to: n.to,
- }
- case *endMarkerNode:
- symTab.endPos2ID[n.pos] = n.id
- default:
- left, right := node.children()
- genSymTab(symTab, left)
- genSymTab(symTab, right)
- }
- return symTab
-}
-
-type DFA struct {
- States []string
- InitialState string
- AcceptingStatesTable map[string]spec.LexModeKindID
- TransitionTable map[string][256]string
-}
-
-func GenDFA(root byteTree, symTab *symbolTable) *DFA {
- initialState := root.first()
- initialStateHash := initialState.hash()
- stateMap := map[string]*symbolPositionSet{
- initialStateHash: initialState,
- }
- tranTab := map[string][256]string{}
- {
- follow := genFollowTable(root)
- unmarkedStates := map[string]*symbolPositionSet{
- initialStateHash: initialState,
- }
- for len(unmarkedStates) > 0 {
- nextUnmarkedStates := map[string]*symbolPositionSet{}
- for hash, state := range unmarkedStates {
- tranTabOfState := [256]*symbolPositionSet{}
- for _, pos := range state.set() {
- if pos.isEndMark() {
- continue
- }
- valRange := symTab.symPos2Byte[pos]
- for symVal := valRange.from; symVal <= valRange.to; symVal++ {
- if tranTabOfState[symVal] == nil {
- tranTabOfState[symVal] = newSymbolPositionSet()
- }
- tranTabOfState[symVal].merge(follow[pos])
- }
- }
- for _, t := range tranTabOfState {
- if t == nil {
- continue
- }
- h := t.hash()
- if _, ok := stateMap[h]; ok {
- continue
- }
- stateMap[h] = t
- nextUnmarkedStates[h] = t
- }
- tabOfState := [256]string{}
- for v, t := range tranTabOfState {
- if t == nil {
- continue
- }
- tabOfState[v] = t.hash()
- }
- tranTab[hash] = tabOfState
- }
- unmarkedStates = nextUnmarkedStates
- }
- }
-
- accTab := map[string]spec.LexModeKindID{}
- {
- for h, s := range stateMap {
- for _, pos := range s.set() {
- if !pos.isEndMark() {
- continue
- }
- priorID, ok := accTab[h]
- if !ok {
- accTab[h] = symTab.endPos2ID[pos]
- } else {
- id := symTab.endPos2ID[pos]
- if id < priorID {
- accTab[h] = id
- }
- }
- }
- }
- }
-
- var states []string
- {
- for s := range stateMap {
- states = append(states, s)
- }
- sort.Slice(states, func(i, j int) bool {
- return states[i] < states[j]
- })
- }
-
- return &DFA{
- States: states,
- InitialState: initialStateHash,
- AcceptingStatesTable: accTab,
- TransitionTable: tranTab,
- }
-}
-
-func GenTransitionTable(dfa *DFA) (*spec.TransitionTable, error) {
- stateHash2ID := map[string]spec.StateID{}
- for i, s := range dfa.States {
- // Since 0 represents an invalid value in a transition table,
- // assign a number greater than or equal to 1 to states.
- stateHash2ID[s] = spec.StateID(i + spec.StateIDMin.Int())
- }
-
- acc := make([]spec.LexModeKindID, len(dfa.States)+1)
- for _, s := range dfa.States {
- id, ok := dfa.AcceptingStatesTable[s]
- if !ok {
- continue
- }
- acc[stateHash2ID[s]] = id
- }
-
- rowCount := len(dfa.States) + 1
- colCount := 256
- tran := make([]spec.StateID, rowCount*colCount)
- for s, tab := range dfa.TransitionTable {
- for v, to := range tab {
- tran[stateHash2ID[s].Int()*256+v] = stateHash2ID[to]
- }
- }
-
- return &spec.TransitionTable{
- InitialStateID: stateHash2ID[dfa.InitialState],
- AcceptingStates: acc,
- UncompressedTransition: tran,
- RowCount: rowCount,
- ColCount: colCount,
- }, nil
-}
diff --git a/grammar/lexical/dfa/dfa_test.go b/grammar/lexical/dfa/dfa_test.go
deleted file mode 100644
index 9af9aeb..0000000
--- a/grammar/lexical/dfa/dfa_test.go
+++ /dev/null
@@ -1,121 +0,0 @@
-package dfa
-
-import (
- "strings"
- "testing"
-
- "grammar/lexical/parser"
- spec "spec/grammar"
-)
-
-func TestGenDFA(t *testing.T) {
- p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
- cpt, err := p.Parse()
- if err != nil {
- t.Fatal(err)
- }
- bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
- spec.LexModeKindIDMin: cpt,
- })
- if err != nil {
- t.Fatal(err)
- }
- dfa := GenDFA(bt, symTab)
- if dfa == nil {
- t.Fatalf("DFA is nil")
- }
-
- symPos := func(n uint16) symbolPosition {
- pos, err := newSymbolPosition(n, false)
- if err != nil {
- panic(err)
- }
- return pos
- }
-
- endPos := func(n uint16) symbolPosition {
- pos, err := newSymbolPosition(n, true)
- if err != nil {
- panic(err)
- }
- return pos
- }
-
- s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3))
- s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4))
- s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5))
- s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6))
-
- rune2Int := func(char rune, index int) uint8 {
- return uint8([]byte(string(char))[index])
- }
-
- tranS0 := [256]string{}
- tranS0[rune2Int('a', 0)] = s1.hash()
- tranS0[rune2Int('b', 0)] = s0.hash()
-
- tranS1 := [256]string{}
- tranS1[rune2Int('a', 0)] = s1.hash()
- tranS1[rune2Int('b', 0)] = s2.hash()
-
- tranS2 := [256]string{}
- tranS2[rune2Int('a', 0)] = s1.hash()
- tranS2[rune2Int('b', 0)] = s3.hash()
-
- tranS3 := [256]string{}
- tranS3[rune2Int('a', 0)] = s1.hash()
- tranS3[rune2Int('b', 0)] = s0.hash()
-
- expectedTranTab := map[string][256]string{
- s0.hash(): tranS0,
- s1.hash(): tranS1,
- s2.hash(): tranS2,
- s3.hash(): tranS3,
- }
- if len(dfa.TransitionTable) != len(expectedTranTab) {
- t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable))
- }
- for h, eTranTab := range expectedTranTab {
- tranTab, ok := dfa.TransitionTable[h]
- if !ok {
- t.Errorf("no entry; hash: %v", h)
- continue
- }
- if len(tranTab) != len(eTranTab) {
- t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab))
- }
- for c, eNext := range eTranTab {
- if eNext == "" {
- continue
- }
-
- next := tranTab[c]
- if next == "" {
- t.Errorf("no enatry: hash: %v, char: %v", h, c)
- }
- if next != eNext {
- t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next)
- }
- }
- }
-
- if dfa.InitialState != s0.hash() {
- t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState)
- }
-
- accTab := map[string]spec.LexModeKindID{
- s3.hash(): 1,
- }
- if len(dfa.AcceptingStatesTable) != len(accTab) {
- t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable))
- }
- for eState, eID := range accTab {
- id, ok := dfa.AcceptingStatesTable[eState]
- if !ok {
- t.Errorf("accepting state is not found: state: %v", eState)
- }
- if id != eID {
- t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id)
- }
- }
-}
diff --git a/grammar/lexical/dfa/symbol_position.go b/grammar/lexical/dfa/symbol_position.go
deleted file mode 100644
index f154251..0000000
--- a/grammar/lexical/dfa/symbol_position.go
+++ /dev/null
@@ -1,182 +0,0 @@
-package dfa
-
-import (
- "encoding/binary"
- "fmt"
- "strings"
-)
-
-type symbolPosition uint16
-
-const (
- symbolPositionNil symbolPosition = 0x0000
-
- symbolPositionMin uint16 = 0x0001
- symbolPositionMax uint16 = 0x7fff
-
- symbolPositionMaskSymbol uint16 = 0x0000
- symbolPositionMaskEndMark uint16 = 0x8000
-
- symbolPositionMaskValue uint16 = 0x7fff
-)
-
-func newSymbolPosition(n uint16, endMark bool) (symbolPosition, error) {
- if n < symbolPositionMin || n > symbolPositionMax {
- return symbolPositionNil, fmt.Errorf("symbol position must be within %v to %v: n: %v, endMark: %v", symbolPositionMin, symbolPositionMax, n, endMark)
- }
- if endMark {
- return symbolPosition(n | symbolPositionMaskEndMark), nil
- }
- return symbolPosition(n | symbolPositionMaskSymbol), nil
-}
-
-func (p symbolPosition) String() string {
- if p.isEndMark() {
- return fmt.Sprintf("end#%v", uint16(p)&symbolPositionMaskValue)
- }
- return fmt.Sprintf("sym#%v", uint16(p)&symbolPositionMaskValue)
-}
-
-func (p symbolPosition) isEndMark() bool {
- return uint16(p)&symbolPositionMaskEndMark > 1
-}
-
-func (p symbolPosition) describe() (uint16, bool) {
- v := uint16(p) & symbolPositionMaskValue
- if p.isEndMark() {
- return v, true
- }
- return v, false
-}
-
-type symbolPositionSet struct {
- // `s` represents a set of symbol positions.
- // However, immediately after adding a symbol position, the elements may be duplicated.
- // When you need an aligned set with no duplicates, you can get such value via the set function.
- s []symbolPosition
- sorted bool
-}
-
-func newSymbolPositionSet() *symbolPositionSet {
- return &symbolPositionSet{
- s: []symbolPosition{},
- sorted: false,
- }
-}
-
-func (s *symbolPositionSet) String() string {
- if len(s.s) <= 0 {
- return "{}"
- }
- ps := s.sortAndRemoveDuplicates()
- var b strings.Builder
- fmt.Fprintf(&b, "{")
- for i, p := range ps {
- if i <= 0 {
- fmt.Fprintf(&b, "%v", p)
- continue
- }
- fmt.Fprintf(&b, ", %v", p)
- }
- fmt.Fprintf(&b, "}")
- return b.String()
-}
-
-func (s *symbolPositionSet) set() []symbolPosition {
- s.sortAndRemoveDuplicates()
- return s.s
-}
-
-func (s *symbolPositionSet) add(pos symbolPosition) *symbolPositionSet {
- s.s = append(s.s, pos)
- s.sorted = false
- return s
-}
-
-func (s *symbolPositionSet) merge(t *symbolPositionSet) *symbolPositionSet {
- s.s = append(s.s, t.s...)
- s.sorted = false
- return s
-}
-
-func (s *symbolPositionSet) hash() string {
- if len(s.s) <= 0 {
- return ""
- }
- sorted := s.sortAndRemoveDuplicates()
- var buf []byte
- for _, p := range sorted {
- b := make([]byte, 8)
- binary.PutUvarint(b, uint64(p))
- buf = append(buf, b...)
- }
- // Convert to a string to be able to use it as a key of a map.
- // But note this byte sequence is made from values of symbol positions,
- // so this is not a well-formed UTF-8 sequence.
- return string(buf)
-}
-
-func (s *symbolPositionSet) sortAndRemoveDuplicates() []symbolPosition {
- if s.sorted {
- return s.s
- }
-
- sortSymbolPositions(s.s, 0, len(s.s)-1)
-
- // Remove duplicates.
- lastV := s.s[0]
- nextIdx := 1
- for _, v := range s.s[1:] {
- if v == lastV {
- continue
- }
- s.s[nextIdx] = v
- nextIdx++
- lastV = v
- }
- s.s = s.s[:nextIdx]
- s.sorted = true
-
- return s.s
-}
-
-// sortSymbolPositions sorts a slice of symbol positions as it uses quick sort.
-func sortSymbolPositions(ps []symbolPosition, left, right int) {
- if left >= right {
- return
- }
- var pivot symbolPosition
- {
- // Use a median as a pivot.
- p1 := ps[left]
- p2 := ps[(left+right)/2]
- p3 := ps[right]
- if p1 > p2 {
- p1, p2 = p2, p1
- }
- if p2 > p3 {
- p2 = p3
- if p1 > p2 {
- p2 = p1
- }
- }
- pivot = p2
- }
- i := left
- j := right
- for i <= j {
- for ps[i] < pivot {
- i++
- }
- for ps[j] > pivot {
- j--
- }
- if i <= j {
- ps[i], ps[j] = ps[j], ps[i]
- i++
- j--
- }
- }
- sortSymbolPositions(ps, left, j)
- sortSymbolPositions(ps, i, right)
-}
diff --git a/grammar/lexical/dfa/symbol_position_test.go b/grammar/lexical/dfa/symbol_position_test.go
deleted file mode 100644
index c867f64..0000000
--- a/grammar/lexical/dfa/symbol_position_test.go
+++ /dev/null
@@ -1,79 +0,0 @@
-package dfa
-
-import (
- "fmt"
- "testing"
-)
-
-func TestNewSymbolPosition(t *testing.T) {
- tests := []struct {
- n uint16
- endMark bool
- err bool
- }{
- {
- n: 0,
- endMark: false,
- err: true,
- },
- {
- n: 0,
- endMark: true,
- err: true,
- },
- {
- n: symbolPositionMin - 1,
- endMark: false,
- err: true,
- },
- {
- n: symbolPositionMin - 1,
- endMark: true,
- err: true,
- },
- {
- n: symbolPositionMin,
- endMark: false,
- },
- {
- n: symbolPositionMin,
- endMark: true,
- },
- {
- n: symbolPositionMax,
- endMark: false,
- },
- {
- n: symbolPositionMax,
- endMark: true,
- },
- {
- n: symbolPositionMax + 1,
- endMark: false,
- err: true,
- },
- {
- n: symbolPositionMax + 1,
- endMark: true,
- err: true,
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) {
- pos, err := newSymbolPosition(tt.n, tt.endMark)
- if tt.err {
- if err == nil {
- t.Fatal("err is nil")
- }
- return
- }
- if err != nil {
- t.Fatal(err)
- }
- n, endMark := pos.describe()
- if n != tt.n || endMark != tt.endMark {
- t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark)
- }
- })
- }
-}
diff --git a/grammar/lexical/dfa/tree.go b/grammar/lexical/dfa/tree.go
deleted file mode 100644
index 85061f9..0000000
--- a/grammar/lexical/dfa/tree.go
+++ /dev/null
@@ -1,567 +0,0 @@
-package dfa
-
-import (
- "fmt"
- "io"
- "sort"
-
- "grammar/lexical/parser"
- spec "spec/grammar"
- "utf8"
-)
-
-type byteTree interface {
- fmt.Stringer
- children() (byteTree, byteTree)
- nullable() bool
- first() *symbolPositionSet
- last() *symbolPositionSet
- clone() byteTree
-}
-
-var (
- _ byteTree = &symbolNode{}
- _ byteTree = &endMarkerNode{}
- _ byteTree = &concatNode{}
- _ byteTree = &altNode{}
- _ byteTree = &repeatNode{}
- _ byteTree = &optionNode{}
-)
-
-type byteRange struct {
- from byte
- to byte
-}
-
-type symbolNode struct {
- byteRange
- pos symbolPosition
- firstMemo *symbolPositionSet
- lastMemo *symbolPositionSet
-}
-
-func newSymbolNode(value byte) *symbolNode {
- return &symbolNode{
- byteRange: byteRange{
- from: value,
- to: value,
- },
- pos: symbolPositionNil,
- }
-}
-
-func newRangeSymbolNode(from, to byte) *symbolNode {
- return &symbolNode{
- byteRange: byteRange{
- from: from,
- to: to,
- },
- pos: symbolPositionNil,
- }
-}
-
-func (n *symbolNode) String() string {
- return fmt.Sprintf("symbol: value: %v-%v, pos: %v", n.from, n.to, n.pos)
-}
-
-func (n *symbolNode) children() (byteTree, byteTree) {
- return nil, nil
-}
-
-func (n *symbolNode) nullable() bool {
- return false
-}
-
-func (n *symbolNode) first() *symbolPositionSet {
- if n.firstMemo == nil {
- n.firstMemo = newSymbolPositionSet()
- n.firstMemo.add(n.pos)
- }
- return n.firstMemo
-}
-
-func (n *symbolNode) last() *symbolPositionSet {
- if n.lastMemo == nil {
- n.lastMemo = newSymbolPositionSet()
- n.lastMemo.add(n.pos)
- }
- return n.lastMemo
-}
-
-func (n *symbolNode) clone() byteTree {
- return newRangeSymbolNode(n.from, n.to)
-}
-
-type endMarkerNode struct {
- id spec.LexModeKindID
- pos symbolPosition
- firstMemo *symbolPositionSet
- lastMemo *symbolPositionSet
-}
-
-func newEndMarkerNode(id spec.LexModeKindID) *endMarkerNode {
- return &endMarkerNode{
- id: id,
- pos: symbolPositionNil,
- }
-}
-
-func (n *endMarkerNode) String() string {
- return fmt.Sprintf("end: pos: %v", n.pos)
-}
-
-func (n *endMarkerNode) children() (byteTree, byteTree) {
- return nil, nil
-}
-
-func (n *endMarkerNode) nullable() bool {
- return false
-}
-
-func (n *endMarkerNode) first() *symbolPositionSet {
- if n.firstMemo == nil {
- n.firstMemo = newSymbolPositionSet()
- n.firstMemo.add(n.pos)
- }
- return n.firstMemo
-}
-
-func (n *endMarkerNode) last() *symbolPositionSet {
- if n.lastMemo == nil {
- n.lastMemo = newSymbolPositionSet()
- n.lastMemo.add(n.pos)
- }
- return n.lastMemo
-}
-
-func (n *endMarkerNode) clone() byteTree {
- return newEndMarkerNode(n.id)
-}
-
-type concatNode struct {
- left byteTree
- right byteTree
- firstMemo *symbolPositionSet
- lastMemo *symbolPositionSet
-}
-
-func newConcatNode(left, right byteTree) *concatNode {
- return &concatNode{
- left: left,
- right: right,
- }
-}
-
-func (n *concatNode) String() string {
- return "concat"
-}
-
-func (n *concatNode) children() (byteTree, byteTree) {
- return n.left, n.right
-}
-
-func (n *concatNode) nullable() bool {
- return n.left.nullable() && n.right.nullable()
-}
-
-func (n *concatNode) first() *symbolPositionSet {
- if n.firstMemo == nil {
- n.firstMemo = newSymbolPositionSet()
- n.firstMemo.merge(n.left.first())
- if n.left.nullable() {
- n.firstMemo.merge(n.right.first())
- }
- n.firstMemo.sortAndRemoveDuplicates()
- }
- return n.firstMemo
-}
-
-func (n *concatNode) last() *symbolPositionSet {
- if n.lastMemo == nil {
- n.lastMemo = newSymbolPositionSet()
- n.lastMemo.merge(n.right.last())
- if n.right.nullable() {
- n.lastMemo.merge(n.left.last())
- }
- n.lastMemo.sortAndRemoveDuplicates()
- }
- return n.lastMemo
-}
-
-func (n *concatNode) clone() byteTree {
- return newConcatNode(n.left.clone(), n.right.clone())
-}
-
-type altNode struct {
- left byteTree
- right byteTree
- firstMemo *symbolPositionSet
- lastMemo *symbolPositionSet
-}
-
-func newAltNode(left, right byteTree) *altNode {
- return &altNode{
- left: left,
- right: right,
- }
-}
-
-func (n *altNode) String() string {
- return "alt"
-}
-
-func (n *altNode) children() (byteTree, byteTree) {
- return n.left, n.right
-}
-
-func (n *altNode) nullable() bool {
- return n.left.nullable() || n.right.nullable()
-}
-
-func (n *altNode) first() *symbolPositionSet {
- if n.firstMemo == nil {
- n.firstMemo = newSymbolPositionSet()
- n.firstMemo.merge(n.left.first())
- n.firstMemo.merge(n.right.first())
- n.firstMemo.sortAndRemoveDuplicates()
- }
- return n.firstMemo
-}
-
-func (n *altNode) last() *symbolPositionSet {
- if n.lastMemo == nil {
- n.lastMemo = newSymbolPositionSet()
- n.lastMemo.merge(n.left.last())
- n.lastMemo.merge(n.right.last())
- n.lastMemo.sortAndRemoveDuplicates()
- }
- return n.lastMemo
-}
-
-func (n *altNode) clone() byteTree {
- return newAltNode(n.left.clone(), n.right.clone())
-}
-
-type repeatNode struct {
- left byteTree
- firstMemo *symbolPositionSet
- lastMemo *symbolPositionSet
-}
-
-func newRepeatNode(left byteTree) *repeatNode {
- return &repeatNode{
- left: left,
- }
-}
-
-func (n *repeatNode) String() string {
- return "repeat"
-}
-
-func (n *repeatNode) children() (byteTree, byteTree) {
- return n.left, nil
-}
-
-func (n *repeatNode) nullable() bool {
- return true
-}
-
-func (n *repeatNode) first() *symbolPositionSet {
- if n.firstMemo == nil {
- n.firstMemo = newSymbolPositionSet()
- n.firstMemo.merge(n.left.first())
- n.firstMemo.sortAndRemoveDuplicates()
- }
- return n.firstMemo
-}
-
-func (n *repeatNode) last() *symbolPositionSet {
- if n.lastMemo == nil {
- n.lastMemo = newSymbolPositionSet()
- n.lastMemo.merge(n.left.last())
- n.lastMemo.sortAndRemoveDuplicates()
- }
- return n.lastMemo
-}
-
-func (n *repeatNode) clone() byteTree {
- return newRepeatNode(n.left.clone())
-}
-
-type optionNode struct {
- left byteTree
- firstMemo *symbolPositionSet
- lastMemo *symbolPositionSet
-}
-
-func newOptionNode(left byteTree) *optionNode {
- return &optionNode{
- left: left,
- }
-}
-
-func (n *optionNode) String() string {
- return "option"
-}
-
-func (n *optionNode) children() (byteTree, byteTree) {
- return n.left, nil
-}
-
-func (n *optionNode) nullable() bool {
- return true
-}
-
-func (n *optionNode) first() *symbolPositionSet {
- if n.firstMemo == nil {
- n.firstMemo = newSymbolPositionSet()
- n.firstMemo.merge(n.left.first())
- n.firstMemo.sortAndRemoveDuplicates()
- }
- return n.firstMemo
-}
-
-func (n *optionNode) last() *symbolPositionSet {
- if n.lastMemo == nil {
- n.lastMemo = newSymbolPositionSet()
- n.lastMemo.merge(n.left.last())
- n.lastMemo.sortAndRemoveDuplicates()
- }
- return n.lastMemo
-}
-
-func (n *optionNode) clone() byteTree {
- return newOptionNode(n.left.clone())
-}
-
-type followTable map[symbolPosition]*symbolPositionSet
-
-func genFollowTable(root byteTree) followTable {
- follow := followTable{}
- calcFollow(follow, root)
- return follow
-}
-
-func calcFollow(follow followTable, ast byteTree) {
- if ast == nil {
- return
- }
- left, right := ast.children()
- calcFollow(follow, left)
- calcFollow(follow, right)
- switch n := ast.(type) {
- case *concatNode:
- l, r := n.children()
- for _, p := range l.last().set() {
- if _, ok := follow[p]; !ok {
- follow[p] = newSymbolPositionSet()
- }
- follow[p].merge(r.first())
- }
- case *repeatNode:
- for _, p := range n.last().set() {
- if _, ok := follow[p]; !ok {
- follow[p] = newSymbolPositionSet()
- }
- follow[p].merge(n.first())
- }
- }
-}
-
-func positionSymbols(node byteTree, n uint16) (uint16, error) {
- if node == nil {
- return n, nil
- }
-
- l, r := node.children()
- p := n
- p, err := positionSymbols(l, p)
- if err != nil {
- return p, err
- }
- p, err = positionSymbols(r, p)
- if err != nil {
- return p, err
- }
- switch n := node.(type) {
- case *symbolNode:
- n.pos, err = newSymbolPosition(p, false)
- if err != nil {
- return p, err
- }
- p++
- case *endMarkerNode:
- n.pos, err = newSymbolPosition(p, true)
- if err != nil {
- return p, err
- }
- p++
- }
- node.first()
- node.last()
- return p, nil
-}
-
-func concat(ts ...byteTree) byteTree {
- nonNilNodes := []byteTree{}
- for _, t := range ts {
- if t == nil {
- continue
- }
- nonNilNodes = append(nonNilNodes, t)
- }
- if len(nonNilNodes) <= 0 {
- return nil
- }
- if len(nonNilNodes) == 1 {
- return nonNilNodes[0]
- }
- concat := newConcatNode(nonNilNodes[0], nonNilNodes[1])
- for _, t := range nonNilNodes[2:] {
- concat = newConcatNode(concat, t)
- }
- return concat
-}
-
-func oneOf(ts ...byteTree) byteTree {
- nonNilNodes := []byteTree{}
- for _, t := range ts {
- if t == nil {
- continue
- }
- nonNilNodes = append(nonNilNodes, t)
- }
- if len(nonNilNodes) <= 0 {
- return nil
- }
- if len(nonNilNodes) == 1 {
- return nonNilNodes[0]
- }
- alt := newAltNode(nonNilNodes[0], nonNilNodes[1])
- for _, t := range nonNilNodes[2:] {
- alt = newAltNode(alt, t)
- }
- return alt
-}
-
-//nolint:unused
-func printByteTree(w io.Writer, t byteTree, ruledLine string, childRuledLinePrefix string, withAttrs bool) {
- if t == nil {
- return
- }
- fmt.Fprintf(w, "%v%v", ruledLine, t)
- if withAttrs {
- fmt.Fprintf(w, ", nullable: %v, first: %v, last: %v", t.nullable(), t.first(), t.last())
- }
- fmt.Fprintf(w, "\n")
- left, right := t.children()
- children := []byteTree{}
- if left != nil {
- children = append(children, left)
- }
- if right != nil {
- children = append(children, right)
- }
- num := len(children)
- for i, child := range children {
- line := "└─ "
- if num > 1 {
- if i == 0 {
- line = "├─ "
- } else if i < num-1 {
- line = "│ "
- }
- }
- prefix := "│ "
- if i >= num-1 {
- prefix = " "
- }
- printByteTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix, withAttrs)
- }
-}
-
-func ConvertCPTreeToByteTree(cpTrees map[spec.LexModeKindID]parser.CPTree) (byteTree, *symbolTable, error) {
- var ids []spec.LexModeKindID
- for id := range cpTrees {
- ids = append(ids, id)
- }
- sort.Slice(ids, func(i, j int) bool {
- return ids[i] < ids[j]
- })
-
- var bt byteTree
- for _, id := range ids {
- cpTree := cpTrees[id]
- t, err := convCPTreeToByteTree(cpTree)
- if err != nil {
- return nil, nil, err
- }
- bt = oneOf(bt, concat(t, newEndMarkerNode(id)))
- }
- _, err := positionSymbols(bt, symbolPositionMin)
- if err != nil {
- return nil, nil, err
- }
-
- return bt, genSymbolTable(bt), nil
-}
-
-func convCPTreeToByteTree(cpTree parser.CPTree) (byteTree, error) {
- if from, to, ok := cpTree.Range(); ok {
- bs, err := utf8.GenCharBlocks(from, to)
- if err != nil {
- return nil, err
- }
- var a byteTree
- for _, b := range bs {
- var c byteTree
- for i := 0; i < len(b.From); i++ {
- c = concat(c, newRangeSymbolNode(b.From[i], b.To[i]))
- }
- a = oneOf(a, c)
- }
- return a, nil
- }
-
- if tree, ok := cpTree.Repeatable(); ok {
- t, err := convCPTreeToByteTree(tree)
- if err != nil {
- return nil, err
- }
- return newRepeatNode(t), nil
- }
-
- if tree, ok := cpTree.Optional(); ok {
- t, err := convCPTreeToByteTree(tree)
- if err != nil {
- return nil, err
- }
- return newOptionNode(t), nil
- }
-
- if left, right, ok := cpTree.Concatenation(); ok {
- l, err := convCPTreeToByteTree(left)
- if err != nil {
- return nil, err
- }
- r, err := convCPTreeToByteTree(right)
- if err != nil {
- return nil, err
- }
- return newConcatNode(l, r), nil
- }
-
- if left, right, ok := cpTree.Alternatives(); ok {
- l, err := convCPTreeToByteTree(left)
- if err != nil {
- return nil, err
- }
- r, err := convCPTreeToByteTree(right)
- if err != nil {
- return nil, err
- }
- return newAltNode(l, r), nil
- }
-
- return nil, fmt.Errorf("invalid tree type: %T", cpTree)
-}
diff --git a/grammar/lexical/dfa/tree_test.go b/grammar/lexical/dfa/tree_test.go
deleted file mode 100644
index 188fe95..0000000
--- a/grammar/lexical/dfa/tree_test.go
+++ /dev/null
@@ -1,257 +0,0 @@
-package dfa
-
-import (
- "fmt"
- "strings"
- "testing"
-
- "grammar/lexical/parser"
- spec "spec/grammar"
-)
-
-func TestByteTree(t *testing.T) {
- tests := []struct {
- root byteTree
- nullable bool
- first *symbolPositionSet
- last *symbolPositionSet
- }{
- {
- root: newSymbolNodeWithPos(0, 1),
- nullable: false,
- first: newSymbolPositionSet().add(1),
- last: newSymbolPositionSet().add(1),
- },
- {
- root: newEndMarkerNodeWithPos(1, 1),
- nullable: false,
- first: newSymbolPositionSet().add(1),
- last: newSymbolPositionSet().add(1),
- },
- {
- root: newConcatNode(
- newSymbolNodeWithPos(0, 1),
- newSymbolNodeWithPos(0, 2),
- ),
- nullable: false,
- first: newSymbolPositionSet().add(1),
- last: newSymbolPositionSet().add(2),
- },
- {
- root: newConcatNode(
- newRepeatNode(newSymbolNodeWithPos(0, 1)),
- newSymbolNodeWithPos(0, 2),
- ),
- nullable: false,
- first: newSymbolPositionSet().add(1).add(2),
- last: newSymbolPositionSet().add(2),
- },
- {
- root: newConcatNode(
- newSymbolNodeWithPos(0, 1),
- newRepeatNode(newSymbolNodeWithPos(0, 2)),
- ),
- nullable: false,
- first: newSymbolPositionSet().add(1),
- last: newSymbolPositionSet().add(1).add(2),
- },
- {
- root: newConcatNode(
- newRepeatNode(newSymbolNodeWithPos(0, 1)),
- newRepeatNode(newSymbolNodeWithPos(0, 2)),
- ),
- nullable: true,
- first: newSymbolPositionSet().add(1).add(2),
- last: newSymbolPositionSet().add(1).add(2),
- },
- {
- root: newAltNode(
- newSymbolNodeWithPos(0, 1),
- newSymbolNodeWithPos(0, 2),
- ),
- nullable: false,
- first: newSymbolPositionSet().add(1).add(2),
- last: newSymbolPositionSet().add(1).add(2),
- },
- {
- root: newAltNode(
- newRepeatNode(newSymbolNodeWithPos(0, 1)),
- newSymbolNodeWithPos(0, 2),
- ),
- nullable: true,
- first: newSymbolPositionSet().add(1).add(2),
- last: newSymbolPositionSet().add(1).add(2),
- },
- {
- root: newAltNode(
- newSymbolNodeWithPos(0, 1),
- newRepeatNode(newSymbolNodeWithPos(0, 2)),
- ),
- nullable: true,
- first: newSymbolPositionSet().add(1).add(2),
- last: newSymbolPositionSet().add(1).add(2),
- },
- {
- root: newAltNode(
- newRepeatNode(newSymbolNodeWithPos(0, 1)),
- newRepeatNode(newSymbolNodeWithPos(0, 2)),
- ),
- nullable: true,
- first: newSymbolPositionSet().add(1).add(2),
- last: newSymbolPositionSet().add(1).add(2),
- },
- {
- root: newRepeatNode(newSymbolNodeWithPos(0, 1)),
- nullable: true,
- first: newSymbolPositionSet().add(1),
- last: newSymbolPositionSet().add(1),
- },
- {
- root: newOptionNode(newSymbolNodeWithPos(0, 1)),
- nullable: true,
- first: newSymbolPositionSet().add(1),
- last: newSymbolPositionSet().add(1),
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
- if tt.root.nullable() != tt.nullable {
- t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable())
- }
- if tt.first.hash() != tt.root.first().hash() {
- t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first())
- }
- if tt.last.hash() != tt.root.last().hash() {
- t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last())
- }
- })
- }
-}
-
-func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode {
- n := newSymbolNode(v)
- n.pos = pos
- return n
-}
-
-func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode {
- n := newEndMarkerNode(spec.LexModeKindID(id))
- n.pos = pos
- return n
-}
-
-func TestFollowAndSymbolTable(t *testing.T) {
- symPos := func(n uint16) symbolPosition {
- pos, err := newSymbolPosition(n, false)
- if err != nil {
- panic(err)
- }
- return pos
- }
-
- endPos := func(n uint16) symbolPosition {
- pos, err := newSymbolPosition(n, true)
- if err != nil {
- panic(err)
- }
- return pos
- }
-
- p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
- cpt, err := p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
- spec.LexModeKindIDMin: cpt,
- })
- if err != nil {
- t.Fatal(err)
- }
-
- {
- followTab := genFollowTable(bt)
- if followTab == nil {
- t.Fatal("follow table is nil")
- }
- expectedFollowTab := followTable{
- 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
- 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
- 3: newSymbolPositionSet().add(symPos(4)),
- 4: newSymbolPositionSet().add(symPos(5)),
- 5: newSymbolPositionSet().add(endPos(6)),
- }
- testFollowTable(t, expectedFollowTab, followTab)
- }
-
- {
- entry := func(v byte) byteRange {
- return byteRange{
- from: v,
- to: v,
- }
- }
-
- expectedSymTab := &symbolTable{
- symPos2Byte: map[symbolPosition]byteRange{
- symPos(1): entry(byte('a')),
- symPos(2): entry(byte('b')),
- symPos(3): entry(byte('a')),
- symPos(4): entry(byte('b')),
- symPos(5): entry(byte('b')),
- },
- endPos2ID: map[symbolPosition]spec.LexModeKindID{
- endPos(6): 1,
- },
- }
- testSymbolTable(t, expectedSymTab, symTab)
- }
-}
-
-func testFollowTable(t *testing.T, expected, actual followTable) {
- if len(actual) != len(expected) {
- t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual))
- }
- for ePos, eSet := range expected {
- aSet, ok := actual[ePos]
- if !ok {
- t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet)
- }
- if aSet.hash() != eSet.hash() {
- t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet)
- }
- }
-}
-
-func testSymbolTable(t *testing.T, expected, actual *symbolTable) {
- t.Helper()
-
- if len(actual.symPos2Byte) != len(expected.symPos2Byte) {
- t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte))
- }
- for ePos, eByte := range expected.symPos2Byte {
- byte, ok := actual.symPos2Byte[ePos]
- if !ok {
- t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte)
- continue
- }
- if byte.from != eByte.from || byte.to != eByte.to {
- t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte)
- }
- }
-
- if len(actual.endPos2ID) != len(expected.endPos2ID) {
- t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID))
- }
- for ePos, eID := range expected.endPos2ID {
- id, ok := actual.endPos2ID[ePos]
- if !ok {
- t.Errorf("an end position entry is not found: %v -> %v", ePos, eID)
- continue
- }
- if id != eID {
- t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id)
- }
- }
-}
diff --git a/grammar/lexical/entry.go b/grammar/lexical/entry.go
deleted file mode 100644
index 6d2fbe3..0000000
--- a/grammar/lexical/entry.go
+++ /dev/null
@@ -1,171 +0,0 @@
-package lexical
-
-import (
- "fmt"
- "sort"
- "strings"
-
- spec "spec/grammar"
-)
-
-type LexEntry struct {
- Kind spec.LexKindName
- Pattern string
- Modes []spec.LexModeName
- Push spec.LexModeName
- Pop bool
- Fragment bool
-}
-
-type LexSpec struct {
- Entries []*LexEntry
-}
-
-func (s *LexSpec) Validate() error {
- if len(s.Entries) <= 0 {
- return fmt.Errorf("the lexical specification must have at least one entry")
- }
- {
- ks := map[string]struct{}{}
- fks := map[string]struct{}{}
- for _, e := range s.Entries {
- // Allow duplicate names between fragments and non-fragments.
- if e.Fragment {
- if _, exist := fks[e.Kind.String()]; exist {
- return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
- }
- fks[e.Kind.String()] = struct{}{}
- } else {
- if _, exist := ks[e.Kind.String()]; exist {
- return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
- }
- ks[e.Kind.String()] = struct{}{}
- }
- }
- }
- {
- kinds := []string{}
- modes := []string{
- spec.LexModeNameDefault.String(), // This is a predefined mode.
- }
- for _, e := range s.Entries {
- if e.Fragment {
- continue
- }
-
- kinds = append(kinds, e.Kind.String())
-
- for _, m := range e.Modes {
- modes = append(modes, m.String())
- }
- }
-
- kindErrs := findSpellingInconsistenciesErrors(kinds, nil)
- modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error {
- if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(spec.LexModeNameDefault.String()) {
- var b strings.Builder
- fmt.Fprintf(&b, "%+v", ids[0])
- for _, id := range ids[1:] {
- fmt.Fprintf(&b, ", %+v", id)
- }
- return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", spec.LexModeNameDefault, b.String())
- }
- return nil
- })
- errs := append(kindErrs, modeErrs...)
- if len(errs) > 0 {
- var b strings.Builder
- fmt.Fprintf(&b, "%v", errs[0])
- for _, err := range errs[1:] {
- fmt.Fprintf(&b, "\n%v", err)
- }
- return fmt.Errorf(b.String())
- }
- }
-
- return nil
-}
-
-func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error {
- duplicated := FindSpellingInconsistencies(ids)
- if len(duplicated) == 0 {
- return nil
- }
-
- var errs []error
- for _, dup := range duplicated {
- if hook != nil {
- err := hook(dup)
- if err != nil {
- errs = append(errs, err)
- continue
- }
- }
-
- var b strings.Builder
- fmt.Fprintf(&b, "%+v", dup[0])
- for _, id := range dup[1:] {
- fmt.Fprintf(&b, ", %+v", id)
- }
- err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String())
- errs = append(errs, err)
- }
-
- return errs
-}
-
-// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same
-// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same
-// in UpperCamelCase. Thus they are considere to be spelling inconsistency.
-func FindSpellingInconsistencies(ids []string) [][]string {
- m := map[string][]string{}
- for _, id := range removeDuplicates(ids) {
- c := SnakeCaseToUpperCamelCase(id)
- m[c] = append(m[c], id)
- }
-
- var duplicated [][]string
- for _, camels := range m {
- if len(camels) == 1 {
- continue
- }
- duplicated = append(duplicated, camels)
- }
-
- for _, dup := range duplicated {
- sort.Slice(dup, func(i, j int) bool {
- return dup[i] < dup[j]
- })
- }
- sort.Slice(duplicated, func(i, j int) bool {
- return duplicated[i][0] < duplicated[j][0]
- })
-
- return duplicated
-}
-
-func removeDuplicates(s []string) []string {
- m := map[string]struct{}{}
- for _, v := range s {
- m[v] = struct{}{}
- }
-
- var unique []string
- for v := range m {
- unique = append(unique, v)
- }
-
- return unique
-}
-
-func SnakeCaseToUpperCamelCase(snake string) string {
- elems := strings.Split(snake, "_")
- for i, e := range elems {
- if len(e) == 0 {
- continue
- }
- elems[i] = strings.ToUpper(string(e[0])) + e[1:]
- }
-
- return strings.Join(elems, "")
-}
diff --git a/grammar/lexical/parser/error.go b/grammar/lexical/parser/error.go
deleted file mode 100644
index be81da4..0000000
--- a/grammar/lexical/parser/error.go
+++ /dev/null
@@ -1,36 +0,0 @@
-package parser
-
-import "fmt"
-
-var (
- ParseErr = fmt.Errorf("parse error")
-
- // lexical errors
- synErrIncompletedEscSeq = fmt.Errorf("incompleted escape sequence; unexpected EOF following \\")
- synErrInvalidEscSeq = fmt.Errorf("invalid escape sequence")
- synErrInvalidCodePoint = fmt.Errorf("code points must consist of just 4 or 6 hex digits")
- synErrCharPropInvalidSymbol = fmt.Errorf("invalid character property symbol")
- SynErrFragmentInvalidSymbol = fmt.Errorf("invalid fragment symbol")
-
- // syntax errors
- synErrUnexpectedToken = fmt.Errorf("unexpected token")
- synErrNullPattern = fmt.Errorf("a pattern must be a non-empty byte sequence")
- synErrUnmatchablePattern = fmt.Errorf("a pattern cannot match any characters")
- synErrAltLackOfOperand = fmt.Errorf("an alternation expression must have operands")
- synErrRepNoTarget = fmt.Errorf("a repeat expression must have an operand")
- synErrGroupNoElem = fmt.Errorf("a grouping expression must include at least one character")
- synErrGroupUnclosed = fmt.Errorf("unclosed grouping expression")
- synErrGroupNoInitiator = fmt.Errorf(") needs preceding (")
- synErrGroupInvalidForm = fmt.Errorf("invalid grouping expression")
- synErrBExpNoElem = fmt.Errorf("a bracket expression must include at least one character")
- synErrBExpUnclosed = fmt.Errorf("unclosed bracket expression")
- synErrBExpInvalidForm = fmt.Errorf("invalid bracket expression")
- synErrRangeInvalidOrder = fmt.Errorf("a range expression with invalid order")
- synErrRangePropIsUnavailable = fmt.Errorf("a property expression is unavailable in a range expression")
- synErrRangeInvalidForm = fmt.Errorf("invalid range expression")
- synErrCPExpInvalidForm = fmt.Errorf("invalid code point expression")
- synErrCPExpOutOfRange = fmt.Errorf("a code point must be between U+0000 to U+10FFFF")
- synErrCharPropExpInvalidForm = fmt.Errorf("invalid character property expression")
- synErrCharPropUnsupported = fmt.Errorf("unsupported character property")
- synErrFragmentExpInvalidForm = fmt.Errorf("invalid fragment expression")
-)
diff --git a/grammar/lexical/parser/fragment.go b/grammar/lexical/parser/fragment.go
deleted file mode 100644
index e51759e..0000000
--- a/grammar/lexical/parser/fragment.go
+++ /dev/null
@@ -1,72 +0,0 @@
-package parser
-
-import (
- "fmt"
-
- spec "spec/grammar"
-)
-
-type incompleteFragment struct {
- kind spec.LexKindName
- root *rootNode
-}
-
-func CompleteFragments(fragments map[spec.LexKindName]CPTree) error {
- if len(fragments) == 0 {
- return nil
- }
-
- completeFragments := map[spec.LexKindName]CPTree{}
- incompleteFragments := []*incompleteFragment{}
- for kind, tree := range fragments {
- root, ok := tree.(*rootNode)
- if !ok {
- return fmt.Errorf("CompleteFragments can take only *rootNode: %T", tree)
- }
- if root.incomplete() {
- incompleteFragments = append(incompleteFragments, &incompleteFragment{
- kind: kind,
- root: root,
- })
- } else {
- completeFragments[kind] = root
- }
- }
- for len(incompleteFragments) > 0 {
- lastIncompCount := len(incompleteFragments)
- remainingFragments := []*incompleteFragment{}
- for _, e := range incompleteFragments {
- complete, err := ApplyFragments(e.root, completeFragments)
- if err != nil {
- return err
- }
- if !complete {
- remainingFragments = append(remainingFragments, e)
- } else {
- completeFragments[e.kind] = e.root
- }
- }
- incompleteFragments = remainingFragments
- if len(incompleteFragments) == lastIncompCount {
- return ParseErr
- }
- }
-
- return nil
-}
-
-func ApplyFragments(t CPTree, fragments map[spec.LexKindName]CPTree) (bool, error) {
- root, ok := t.(*rootNode)
- if !ok {
- return false, fmt.Errorf("ApplyFragments can take only *rootNode type: %T", t)
- }
-
- for name, frag := range fragments {
- err := root.applyFragment(name, frag)
- if err != nil {
- return false, err
- }
- }
-
- return !root.incomplete(), nil
-}
diff --git a/grammar/lexical/parser/lexer.go b/grammar/lexical/parser/lexer.go
deleted file mode 100644
index 3861825..0000000
--- a/grammar/lexical/parser/lexer.go
+++ /dev/null
@@ -1,594 +0,0 @@
-package parser
-
-import (
- "bufio"
- "fmt"
- "io"
- "strings"
-)
-
-type tokenKind string
-
-const (
- tokenKindChar tokenKind = "char"
- tokenKindAnyChar tokenKind = "."
- tokenKindRepeat tokenKind = "*"
- tokenKindRepeatOneOrMore tokenKind = "+"
- tokenKindOption tokenKind = "?"
- tokenKindAlt tokenKind = "|"
- tokenKindGroupOpen tokenKind = "("
- tokenKindGroupClose tokenKind = ")"
- tokenKindBExpOpen tokenKind = "["
- tokenKindInverseBExpOpen tokenKind = "[^"
- tokenKindBExpClose tokenKind = "]"
- tokenKindCharRange tokenKind = "-"
- tokenKindCodePointLeader tokenKind = "\\u"
- tokenKindCharPropLeader tokenKind = "\\p"
- tokenKindFragmentLeader tokenKind = "\\f"
- tokenKindLBrace tokenKind = "{"
- tokenKindRBrace tokenKind = "}"
- tokenKindEqual tokenKind = "="
- tokenKindCodePoint tokenKind = "code point"
- tokenKindCharPropSymbol tokenKind = "character property symbol"
- tokenKindFragmentSymbol tokenKind = "fragment symbol"
- tokenKindEOF tokenKind = "eof"
-)
-
-type token struct {
- kind tokenKind
- char rune
- propSymbol string
- codePoint string
- fragmentSymbol string
-}
-
-const nullChar = '\u0000'
-
-func newToken(kind tokenKind, char rune) *token {
- return &token{
- kind: kind,
- char: char,
- }
-}
-
-func newCodePointToken(codePoint string) *token {
- return &token{
- kind: tokenKindCodePoint,
- codePoint: codePoint,
- }
-}
-
-func newCharPropSymbolToken(propSymbol string) *token {
- return &token{
- kind: tokenKindCharPropSymbol,
- propSymbol: propSymbol,
- }
-}
-
-func newFragmentSymbolToken(fragmentSymbol string) *token {
- return &token{
- kind: tokenKindFragmentSymbol,
- fragmentSymbol: fragmentSymbol,
- }
-}
-
-type lexerMode string
-
-const (
- lexerModeDefault lexerMode = "default"
- lexerModeBExp lexerMode = "bracket expression"
- lexerModeCPExp lexerMode = "code point expression"
- lexerModeCharPropExp lexerMode = "character property expression"
- lexerModeFragmentExp lexerMode = "fragment expression"
-)
-
-type lexerModeStack struct {
- stack []lexerMode
-}
-
-func newLexerModeStack() *lexerModeStack {
- return &lexerModeStack{
- stack: []lexerMode{
- lexerModeDefault,
- },
- }
-}
-
-func (s *lexerModeStack) top() lexerMode {
- return s.stack[len(s.stack)-1]
-}
-
-func (s *lexerModeStack) push(m lexerMode) {
- s.stack = append(s.stack, m)
-}
-
-func (s *lexerModeStack) pop() {
- s.stack = s.stack[:len(s.stack)-1]
-}
-
-type rangeState string
-
-// [a-z]
-// ^^^^
-// |||`-- ready
-// ||`-- expect range terminator
-// |`-- read range initiator
-// `-- ready
-const (
- rangeStateReady rangeState = "ready"
- rangeStateReadRangeInitiator rangeState = "read range initiator"
- rangeStateExpectRangeTerminator rangeState = "expect range terminator"
-)
-
-type lexer struct {
- src *bufio.Reader
- peekChar2 rune
- peekEOF2 bool
- peekChar1 rune
- peekEOF1 bool
- lastChar rune
- reachedEOF bool
- prevChar1 rune
- prevEOF1 bool
- prevChar2 rune
- pervEOF2 bool
- modeStack *lexerModeStack
- rangeState rangeState
-
- errCause error
- errDetail string
-}
-
-func newLexer(src io.Reader) *lexer {
- return &lexer{
- src: bufio.NewReader(src),
- peekChar2: nullChar,
- peekEOF2: false,
- peekChar1: nullChar,
- peekEOF1: false,
- lastChar: nullChar,
- reachedEOF: false,
- prevChar1: nullChar,
- prevEOF1: false,
- prevChar2: nullChar,
- pervEOF2: false,
- modeStack: newLexerModeStack(),
- rangeState: rangeStateReady,
- }
-}
-
-func (l *lexer) error() (string, error) {
- return l.errDetail, l.errCause
-}
-
-func (l *lexer) next() (*token, error) {
- c, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- return newToken(tokenKindEOF, nullChar), nil
- }
-
- switch l.modeStack.top() {
- case lexerModeBExp:
- tok, err := l.nextInBExp(c)
- if err != nil {
- return nil, err
- }
- if tok.kind == tokenKindChar || tok.kind == tokenKindCodePointLeader || tok.kind == tokenKindCharPropLeader {
- switch l.rangeState {
- case rangeStateReady:
- l.rangeState = rangeStateReadRangeInitiator
- case rangeStateExpectRangeTerminator:
- l.rangeState = rangeStateReady
- }
- }
- switch tok.kind {
- case tokenKindBExpClose:
- l.modeStack.pop()
- case tokenKindCharRange:
- l.rangeState = rangeStateExpectRangeTerminator
- case tokenKindCodePointLeader:
- l.modeStack.push(lexerModeCPExp)
- case tokenKindCharPropLeader:
- l.modeStack.push(lexerModeCharPropExp)
- }
- return tok, nil
- case lexerModeCPExp:
- tok, err := l.nextInCodePoint(c)
- if err != nil {
- return nil, err
- }
- switch tok.kind {
- case tokenKindRBrace:
- l.modeStack.pop()
- }
- return tok, nil
- case lexerModeCharPropExp:
- tok, err := l.nextInCharProp(c)
- if err != nil {
- return nil, err
- }
- switch tok.kind {
- case tokenKindRBrace:
- l.modeStack.pop()
- }
- return tok, nil
- case lexerModeFragmentExp:
- tok, err := l.nextInFragment(c)
- if err != nil {
- return nil, err
- }
- switch tok.kind {
- case tokenKindRBrace:
- l.modeStack.pop()
- }
- return tok, nil
- default:
- tok, err := l.nextInDefault(c)
- if err != nil {
- return nil, err
- }
- switch tok.kind {
- case tokenKindBExpOpen:
- l.modeStack.push(lexerModeBExp)
- l.rangeState = rangeStateReady
- case tokenKindInverseBExpOpen:
- l.modeStack.push(lexerModeBExp)
- l.rangeState = rangeStateReady
- case tokenKindCodePointLeader:
- l.modeStack.push(lexerModeCPExp)
- case tokenKindCharPropLeader:
- l.modeStack.push(lexerModeCharPropExp)
- case tokenKindFragmentLeader:
- l.modeStack.push(lexerModeFragmentExp)
- }
- return tok, nil
- }
-}
-
-func (l *lexer) nextInDefault(c rune) (*token, error) {
- switch c {
- case '*':
- return newToken(tokenKindRepeat, nullChar), nil
- case '+':
- return newToken(tokenKindRepeatOneOrMore, nullChar), nil
- case '?':
- return newToken(tokenKindOption, nullChar), nil
- case '.':
- return newToken(tokenKindAnyChar, nullChar), nil
- case '|':
- return newToken(tokenKindAlt, nullChar), nil
- case '(':
- return newToken(tokenKindGroupOpen, nullChar), nil
- case ')':
- return newToken(tokenKindGroupClose, nullChar), nil
- case '[':
- c1, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- return newToken(tokenKindBExpOpen, nullChar), nil
- }
- if c1 != '^' {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- return newToken(tokenKindBExpOpen, nullChar), nil
- }
- c2, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- return newToken(tokenKindInverseBExpOpen, nullChar), nil
- }
- if c2 != ']' {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- return newToken(tokenKindInverseBExpOpen, nullChar), nil
- }
- err = l.restore()
- if err != nil {
- return nil, err
- }
- err = l.restore()
- if err != nil {
- return nil, err
- }
- return newToken(tokenKindBExpOpen, nullChar), nil
- case '\\':
- c, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- l.errCause = synErrIncompletedEscSeq
- return nil, ParseErr
- }
- if c == 'u' {
- return newToken(tokenKindCodePointLeader, nullChar), nil
- }
- if c == 'p' {
- return newToken(tokenKindCharPropLeader, nullChar), nil
- }
- if c == 'f' {
- return newToken(tokenKindFragmentLeader, nullChar), nil
- }
- if c == '\\' || c == '.' || c == '*' || c == '+' || c == '?' || c == '|' || c == '(' || c == ')' || c == '[' || c == ']' {
- return newToken(tokenKindChar, c), nil
- }
- l.errCause = synErrInvalidEscSeq
- l.errDetail = fmt.Sprintf("\\%v is not supported", string(c))
- return nil, ParseErr
- default:
- return newToken(tokenKindChar, c), nil
- }
-}
-
-func (l *lexer) nextInBExp(c rune) (*token, error) {
- switch c {
- case '-':
- if l.rangeState != rangeStateReadRangeInitiator {
- return newToken(tokenKindChar, c), nil
- }
- c1, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- return newToken(tokenKindChar, c), nil
- }
- if c1 != ']' {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- return newToken(tokenKindCharRange, nullChar), nil
- }
- err = l.restore()
- if err != nil {
- return nil, err
- }
- return newToken(tokenKindChar, c), nil
- case ']':
- return newToken(tokenKindBExpClose, nullChar), nil
- case '\\':
- c, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- l.errCause = synErrIncompletedEscSeq
- return nil, ParseErr
- }
- if c == 'u' {
- return newToken(tokenKindCodePointLeader, nullChar), nil
- }
- if c == 'p' {
- return newToken(tokenKindCharPropLeader, nullChar), nil
- }
- if c == '\\' || c == '^' || c == '-' || c == ']' {
- return newToken(tokenKindChar, c), nil
- }
- l.errCause = synErrInvalidEscSeq
- l.errDetail = fmt.Sprintf("\\%v is not supported in a bracket expression", string(c))
- return nil, ParseErr
- default:
- return newToken(tokenKindChar, c), nil
- }
-}
-
-func (l *lexer) nextInCodePoint(c rune) (*token, error) {
- switch c {
- case '{':
- return newToken(tokenKindLBrace, nullChar), nil
- case '}':
- return newToken(tokenKindRBrace, nullChar), nil
- default:
- if !isHexDigit(c) {
- l.errCause = synErrInvalidCodePoint
- return nil, ParseErr
- }
- var b strings.Builder
- fmt.Fprint(&b, string(c))
- n := 1
- for {
- c, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- break
- }
- if c == '}' {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- break
- }
- if !isHexDigit(c) || n >= 6 {
- l.errCause = synErrInvalidCodePoint
- return nil, ParseErr
- }
- fmt.Fprint(&b, string(c))
- n++
- }
- cp := b.String()
- cpLen := len(cp)
- if !(cpLen == 4 || cpLen == 6) {
- l.errCause = synErrInvalidCodePoint
- return nil, ParseErr
- }
- return newCodePointToken(b.String()), nil
- }
-}
-
-func isHexDigit(c rune) bool {
- if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' {
- return true
- }
- return false
-}
-
-func (l *lexer) nextInCharProp(c rune) (*token, error) {
- switch c {
- case '{':
- return newToken(tokenKindLBrace, nullChar), nil
- case '}':
- return newToken(tokenKindRBrace, nullChar), nil
- case '=':
- return newToken(tokenKindEqual, nullChar), nil
- default:
- var b strings.Builder
- fmt.Fprint(&b, string(c))
- n := 1
- for {
- c, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- break
- }
- if c == '}' || c == '=' {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- break
- }
- fmt.Fprint(&b, string(c))
- n++
- }
- sym := strings.TrimSpace(b.String())
- if len(sym) == 0 {
- l.errCause = synErrCharPropInvalidSymbol
- return nil, ParseErr
- }
- return newCharPropSymbolToken(sym), nil
- }
-}
-
-func (l *lexer) nextInFragment(c rune) (*token, error) {
- switch c {
- case '{':
- return newToken(tokenKindLBrace, nullChar), nil
- case '}':
- return newToken(tokenKindRBrace, nullChar), nil
- default:
- var b strings.Builder
- fmt.Fprint(&b, string(c))
- n := 1
- for {
- c, eof, err := l.read()
- if err != nil {
- return nil, err
- }
- if eof {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- break
- }
- if c == '}' {
- err := l.restore()
- if err != nil {
- return nil, err
- }
- break
- }
- fmt.Fprint(&b, string(c))
- n++
- }
- sym := strings.TrimSpace(b.String())
- if len(sym) == 0 {
- l.errCause = SynErrFragmentInvalidSymbol
- return nil, ParseErr
- }
- return newFragmentSymbolToken(sym), nil
- }
-}
-
-func (l *lexer) read() (rune, bool, error) {
- if l.reachedEOF {
- return l.lastChar, l.reachedEOF, nil
- }
- if l.peekChar1 != nullChar || l.peekEOF1 {
- l.prevChar2 = l.prevChar1
- l.pervEOF2 = l.prevEOF1
- l.prevChar1 = l.lastChar
- l.prevEOF1 = l.reachedEOF
- l.lastChar = l.peekChar1
- l.reachedEOF = l.peekEOF1
- l.peekChar1 = l.peekChar2
- l.peekEOF1 = l.peekEOF2
- l.peekChar2 = nullChar
- l.peekEOF2 = false
- return l.lastChar, l.reachedEOF, nil
- }
- c, _, err := l.src.ReadRune()
- if err != nil {
- if err == io.EOF {
- l.prevChar2 = l.prevChar1
- l.pervEOF2 = l.prevEOF1
- l.prevChar1 = l.lastChar
- l.prevEOF1 = l.reachedEOF
- l.lastChar = nullChar
- l.reachedEOF = true
- return l.lastChar, l.reachedEOF, nil
- }
- return nullChar, false, err
- }
- l.prevChar2 = l.prevChar1
- l.pervEOF2 = l.prevEOF1
- l.prevChar1 = l.lastChar
- l.prevEOF1 = l.reachedEOF
- l.lastChar = c
- l.reachedEOF = false
- return l.lastChar, l.reachedEOF, nil
-}
-
-func (l *lexer) restore() error {
- if l.lastChar == nullChar && !l.reachedEOF {
- return fmt.Errorf("failed to call restore() because the last character is null")
- }
- l.peekChar2 = l.peekChar1
- l.peekEOF2 = l.peekEOF1
- l.peekChar1 = l.lastChar
- l.peekEOF1 = l.reachedEOF
- l.lastChar = l.prevChar1
- l.reachedEOF = l.prevEOF1
- l.prevChar1 = l.prevChar2
- l.prevEOF1 = l.pervEOF2
- l.prevChar2 = nullChar
- l.pervEOF2 = false
- return nil
-}
diff --git a/grammar/lexical/parser/lexer_test.go b/grammar/lexical/parser/lexer_test.go
deleted file mode 100644
index 055466e..0000000
--- a/grammar/lexical/parser/lexer_test.go
+++ /dev/null
@@ -1,524 +0,0 @@
-package parser
-
-import (
- "strings"
- "testing"
-)
-
-func TestLexer(t *testing.T) {
- tests := []struct {
- caption string
- src string
- tokens []*token
- err error
- }{
- {
- caption: "lexer can recognize ordinaly characters",
- src: "123abcいろは",
- tokens: []*token{
- newToken(tokenKindChar, '1'),
- newToken(tokenKindChar, '2'),
- newToken(tokenKindChar, '3'),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindChar, 'b'),
- newToken(tokenKindChar, 'c'),
- newToken(tokenKindChar, 'い'),
- newToken(tokenKindChar, 'ろ'),
- newToken(tokenKindChar, 'は'),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the special characters in default mode",
- src: ".*+?|()[\\u",
- tokens: []*token{
- newToken(tokenKindAnyChar, nullChar),
- newToken(tokenKindRepeat, nullChar),
- newToken(tokenKindRepeatOneOrMore, nullChar),
- newToken(tokenKindOption, nullChar),
- newToken(tokenKindAlt, nullChar),
- newToken(tokenKindGroupOpen, nullChar),
- newToken(tokenKindGroupClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the escape sequences in default mode",
- src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[",
- tokens: []*token{
- newToken(tokenKindChar, '\\'),
- newToken(tokenKindChar, '.'),
- newToken(tokenKindChar, '*'),
- newToken(tokenKindChar, '+'),
- newToken(tokenKindChar, '?'),
- newToken(tokenKindChar, '|'),
- newToken(tokenKindChar, '('),
- newToken(tokenKindChar, ')'),
- newToken(tokenKindChar, '['),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "], {, and } are treated as an ordinary character in default mode",
- src: "]{}",
- tokens: []*token{
- newToken(tokenKindChar, ']'),
- newToken(tokenKindChar, '{'),
- newToken(tokenKindChar, '}'),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the special characters in bracket expression mode",
- src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("09AF"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("09abcf"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the escape sequences in bracket expression mode",
- src: "[\\^a\\-z]",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '^'),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "in a bracket expression, the special characters are also handled as normal characters",
- src: "[\\\\.*+?|()[",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '\\'),
- newToken(tokenKindChar, '.'),
- newToken(tokenKindChar, '*'),
- newToken(tokenKindChar, '+'),
- newToken(tokenKindChar, '?'),
- newToken(tokenKindChar, '|'),
- newToken(tokenKindChar, '('),
- newToken(tokenKindChar, ')'),
- newToken(tokenKindChar, '['),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters",
- // [...-...][...-][-...][-]
- // ~~~~~~~ ~ ~ ~
- // ^ ^ ^ ^
- // | | | `-- Ordinary Character (b)
- // | | `-- Ordinary Character (b)
- // | `-- Ordinary Character (b)
- // `-- Character Range (a)
- //
- // a. *-* is handled as a character-range expression.
- // b. *-, -*, or - are handled as ordinary characters.
- src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, 'a'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, 'z'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindCharRange, nullChar),
- newToken(tokenKindChar, '-'),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters",
- // [^...^...][^]
- // ~~ ~ ~~
- // ^ ^ ^^
- // | | |`-- Ordinary Character (c)
- // | | `-- Bracket Expression
- // | `-- Ordinary Character (b)
- // `-- Inverse Bracket Expression (a)
- //
- // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions.
- // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols.
- // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character.
- src: "[^^][^]",
- tokens: []*token{
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindChar, '^'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindChar, '^'),
- newToken(tokenKindBExpClose, nullChar),
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer raises an error when an invalid escape sequence appears",
- src: "\\@",
- err: synErrInvalidEscSeq,
- },
- {
- caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
- src: "\\",
- err: synErrIncompletedEscSeq,
- },
- {
- caption: "lexer raises an error when an invalid escape sequence appears",
- src: "[\\@",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- },
- err: synErrInvalidEscSeq,
- },
- {
- caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears",
- src: "[\\",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- },
- err: synErrIncompletedEscSeq,
- },
- {
- caption: "lexer can recognize the special characters and code points in code point expression mode",
- src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("0123"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("4567"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("89abcd"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("efAB"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("CDEF01"),
- newToken(tokenKindRBrace, nullChar),
-
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("0123"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("4567"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("89abcd"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("efAB"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("CDEF01"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("0123"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("4567"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("89abcd"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("efAB"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("CDEF01"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "a one digit hex string isn't a valid code point",
- src: "\\u{0",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a two digits hex string isn't a valid code point",
- src: "\\u{01",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a three digits hex string isn't a valid code point",
- src: "\\u{012",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a four digits hex string is a valid code point",
- src: "\\u{0123}",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("0123"),
- newToken(tokenKindRBrace, nullChar),
- },
- },
- {
- caption: "a five digits hex string isn't a valid code point",
- src: "\\u{01234",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a six digits hex string is a valid code point",
- src: "\\u{012345}",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCodePointToken("012345"),
- newToken(tokenKindRBrace, nullChar),
- },
- },
- {
- caption: "a seven digits hex string isn't a valid code point",
- src: "\\u{0123456",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a code point must be hex digits",
- src: "\\u{g",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "a code point must be hex digits",
- src: "\\u{G",
- tokens: []*token{
- newToken(tokenKindCodePointLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- },
- err: synErrInvalidCodePoint,
- },
- {
- caption: "lexer can recognize the special characters and symbols in character property expression mode",
- src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]",
- tokens: []*token{
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("General_Category"),
- newToken(tokenKindEqual, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
-
- newToken(tokenKindBExpOpen, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("General_Category"),
- newToken(tokenKindEqual, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindInverseBExpOpen, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindCharPropLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newCharPropSymbolToken("General_Category"),
- newToken(tokenKindEqual, nullChar),
- newCharPropSymbolToken("Letter"),
- newToken(tokenKindRBrace, nullChar),
- newToken(tokenKindBExpClose, nullChar),
-
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "lexer can recognize the special characters and symbols in fragment expression mode",
- src: "\\f{integer}",
- tokens: []*token{
- newToken(tokenKindFragmentLeader, nullChar),
- newToken(tokenKindLBrace, nullChar),
- newFragmentSymbolToken("integer"),
- newToken(tokenKindRBrace, nullChar),
-
- newToken(tokenKindEOF, nullChar),
- },
- },
- {
- caption: "a fragment expression is not supported in a bracket expression",
- src: "[\\f",
- tokens: []*token{
- newToken(tokenKindBExpOpen, nullChar),
- },
- err: synErrInvalidEscSeq,
- },
- {
- caption: "a fragment expression is not supported in an inverse bracket expression",
- src: "[^\\f",
- tokens: []*token{
- newToken(tokenKindInverseBExpOpen, nullChar),
- },
- err: synErrInvalidEscSeq,
- },
- }
- for _, tt := range tests {
- t.Run(tt.caption, func(t *testing.T) {
- lex := newLexer(strings.NewReader(tt.src))
- var err error
- var tok *token
- i := 0
- for {
- tok, err = lex.next()
- if err != nil {
- break
- }
- if i >= len(tt.tokens) {
- break
- }
- eTok := tt.tokens[i]
- i++
- testToken(t, tok, eTok)
-
- if tok.kind == tokenKindEOF {
- break
- }
- }
- if tt.err != nil {
- if err != ParseErr {
- t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
- }
- detail, cause := lex.error()
- if cause != tt.err {
- t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail)
- }
- } else {
- if err != nil {
- t.Fatalf("unexpected error: %v", err)
- }
- }
- if i < len(tt.tokens) {
- t.Fatalf("expecte more tokens")
- }
- })
- }
-}
-
-func testToken(t *testing.T, a, e *token) {
- t.Helper()
- if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint {
- t.Fatalf("unexpected token: want: %+v, got: %+v", e, a)
- }
-}
diff --git a/grammar/lexical/parser/parser.go b/grammar/lexical/parser/parser.go
deleted file mode 100644
index d1a08a2..0000000
--- a/grammar/lexical/parser/parser.go
+++ /dev/null
@@ -1,531 +0,0 @@
-package parser
-
-import (
- "bytes"
- "fmt"
- "io"
- "strconv"
-
- spec "spec/grammar"
- "ucd"
-)
-
-type PatternEntry struct {
- ID spec.LexModeKindID
- Pattern []byte
-}
-
-type parser struct {
- kind spec.LexKindName
- lex *lexer
- peekedTok *token
- lastTok *token
-
- // If and only if isContributoryPropertyExposed is true, the parser interprets contributory properties that
- // appear in property expressions.
- //
- // The contributory properties are not exposed, and users cannot use those properties because the parser
- // follows [UAX #44 5.13 Property APIs]. For instance, \p{Other_Alphabetic} is invalid.
- //
- // isContributoryPropertyExposed is set to true when the parser is generated recursively. The parser needs to
- // interpret derived properties internally because the derived properties consist of other properties that
- // may contain the contributory properties.
- //
- // [UAX #44 5.13 Property APIs] says:
- // > The following subtypes of Unicode character properties should generally not be exposed in APIs,
- // > except in limited circumstances. They may not be useful, particularly in public API collections,
- // > and may instead prove misleading to the users of such API collections.
- // > * Contributory properties are not recommended for public APIs.
- // > ...
- // https://unicode.org/reports/tr44/#Property_APIs
- isContributoryPropertyExposed bool
-
- errCause error
- errDetail string
-}
-
-func NewParser(kind spec.LexKindName, src io.Reader) *parser {
- return &parser{
- kind: kind,
- lex: newLexer(src),
- isContributoryPropertyExposed: false,
- }
-}
-
-func (p *parser) exposeContributoryProperty() {
- p.isContributoryPropertyExposed = true
-}
-
-func (p *parser) Error() (string, error) {
- return p.errDetail, p.errCause
-}
-
-func (p *parser) Parse() (root CPTree, retErr error) {
- defer func() {
- err := recover()
- if err != nil {
- var ok bool
- retErr, ok = err.(error)
- if !ok {
- panic(err)
- }
- return
- }
- }()
-
- return newRootNode(p.kind, p.parseRegexp()), nil
-}
-
-func (p *parser) parseRegexp() CPTree {
- alt := p.parseAlt()
- if alt == nil {
- if p.consume(tokenKindGroupClose) {
- p.raiseParseError(synErrGroupNoInitiator, "")
- }
- p.raiseParseError(synErrNullPattern, "")
- }
- if p.consume(tokenKindGroupClose) {
- p.raiseParseError(synErrGroupNoInitiator, "")
- }
- p.expect(tokenKindEOF)
- return alt
-}
-
-func (p *parser) parseAlt() CPTree {
- left := p.parseConcat()
- if left == nil {
- if p.consume(tokenKindAlt) {
- p.raiseParseError(synErrAltLackOfOperand, "")
- }
- return nil
- }
- for {
- if !p.consume(tokenKindAlt) {
- break
- }
- right := p.parseConcat()
- if right == nil {
- p.raiseParseError(synErrAltLackOfOperand, "")
- }
- left = newAltNode(left, right)
- }
- return left
-}
-
-func (p *parser) parseConcat() CPTree {
- left := p.parseRepeat()
- for {
- right := p.parseRepeat()
- if right == nil {
- break
- }
- left = newConcatNode(left, right)
- }
- return left
-}
-
-func (p *parser) parseRepeat() CPTree {
- group := p.parseGroup()
- if group == nil {
- if p.consume(tokenKindRepeat) {
- p.raiseParseError(synErrRepNoTarget, "* needs an operand")
- }
- if p.consume(tokenKindRepeatOneOrMore) {
- p.raiseParseError(synErrRepNoTarget, "+ needs an operand")
- }
- if p.consume(tokenKindOption) {
- p.raiseParseError(synErrRepNoTarget, "? needs an operand")
- }
- return nil
- }
- if p.consume(tokenKindRepeat) {
- return newRepeatNode(group)
- }
- if p.consume(tokenKindRepeatOneOrMore) {
- return newRepeatOneOrMoreNode(group)
- }
- if p.consume(tokenKindOption) {
- return newOptionNode(group)
- }
- return group
-}
-
-func (p *parser) parseGroup() CPTree {
- if p.consume(tokenKindGroupOpen) {
- alt := p.parseAlt()
- if alt == nil {
- if p.consume(tokenKindEOF) {
- p.raiseParseError(synErrGroupUnclosed, "")
- }
- p.raiseParseError(synErrGroupNoElem, "")
- }
- if p.consume(tokenKindEOF) {
- p.raiseParseError(synErrGroupUnclosed, "")
- }
- if !p.consume(tokenKindGroupClose) {
- p.raiseParseError(synErrGroupInvalidForm, "")
- }
- return alt
- }
- return p.parseSingleChar()
-}
-
-func (p *parser) parseSingleChar() CPTree {
- if p.consume(tokenKindAnyChar) {
- return genAnyCharAST()
- }
- if p.consume(tokenKindBExpOpen) {
- left := p.parseBExpElem()
- if left == nil {
- if p.consume(tokenKindEOF) {
- p.raiseParseError(synErrBExpUnclosed, "")
- }
- p.raiseParseError(synErrBExpNoElem, "")
- }
- for {
- right := p.parseBExpElem()
- if right == nil {
- break
- }
- left = newAltNode(left, right)
- }
- if p.consume(tokenKindEOF) {
- p.raiseParseError(synErrBExpUnclosed, "")
- }
- p.expect(tokenKindBExpClose)
- return left
- }
- if p.consume(tokenKindInverseBExpOpen) {
- elem := p.parseBExpElem()
- if elem == nil {
- if p.consume(tokenKindEOF) {
- p.raiseParseError(synErrBExpUnclosed, "")
- }
- p.raiseParseError(synErrBExpNoElem, "")
- }
- inverse := exclude(elem, genAnyCharAST())
- if inverse == nil {
- p.raiseParseError(synErrUnmatchablePattern, "")
- }
- for {
- elem := p.parseBExpElem()
- if elem == nil {
- break
- }
- inverse = exclude(elem, inverse)
- if inverse == nil {
- p.raiseParseError(synErrUnmatchablePattern, "")
- }
- }
- if p.consume(tokenKindEOF) {
- p.raiseParseError(synErrBExpUnclosed, "")
- }
- p.expect(tokenKindBExpClose)
- return inverse
- }
- if p.consume(tokenKindCodePointLeader) {
- return p.parseCodePoint()
- }
- if p.consume(tokenKindCharPropLeader) {
- return p.parseCharProp()
- }
- if p.consume(tokenKindFragmentLeader) {
- return p.parseFragment()
- }
- c := p.parseNormalChar()
- if c == nil {
- if p.consume(tokenKindBExpClose) {
- p.raiseParseError(synErrBExpInvalidForm, "")
- }
- return nil
- }
- return c
-}
-
-func (p *parser) parseBExpElem() CPTree {
- var left CPTree
- switch {
- case p.consume(tokenKindCodePointLeader):
- left = p.parseCodePoint()
- case p.consume(tokenKindCharPropLeader):
- left = p.parseCharProp()
- if p.consume(tokenKindCharRange) {
- p.raiseParseError(synErrRangePropIsUnavailable, "")
- }
- default:
- left = p.parseNormalChar()
- }
- if left == nil {
- return nil
- }
- if !p.consume(tokenKindCharRange) {
- return left
- }
- var right CPTree
- switch {
- case p.consume(tokenKindCodePointLeader):
- right = p.parseCodePoint()
- case p.consume(tokenKindCharPropLeader):
- p.raiseParseError(synErrRangePropIsUnavailable, "")
- default:
- right = p.parseNormalChar()
- }
- if right == nil {
- p.raiseParseError(synErrRangeInvalidForm, "")
- }
- from, _, _ := left.Range()
- _, to, _ := right.Range()
- if !isValidOrder(from, to) {
- p.raiseParseError(synErrRangeInvalidOrder, fmt.Sprintf("%X..%X", from, to))
- }
- return newRangeSymbolNode(from, to)
-}
-
-func (p *parser) parseCodePoint() CPTree {
- if !p.consume(tokenKindLBrace) {
- p.raiseParseError(synErrCPExpInvalidForm, "")
- }
- if !p.consume(tokenKindCodePoint) {
- p.raiseParseError(synErrCPExpInvalidForm, "")
- }
-
- n, err := strconv.ParseInt(p.lastTok.codePoint, 16, 64)
- if err != nil {
- panic(fmt.Errorf("failed to decode a code point (%v) into a int: %v", p.lastTok.codePoint, err))
- }
- if n < 0x0000 || n > 0x10FFFF {
- p.raiseParseError(synErrCPExpOutOfRange, "")
- }
-
- sym := newSymbolNode(rune(n))
-
- if !p.consume(tokenKindRBrace) {
- p.raiseParseError(synErrCPExpInvalidForm, "")
- }
-
- return sym
-}
-
-func (p *parser) parseCharProp() CPTree {
- if !p.consume(tokenKindLBrace) {
- p.raiseParseError(synErrCharPropExpInvalidForm, "")
- }
- var sym1, sym2 string
- if !p.consume(tokenKindCharPropSymbol) {
- p.raiseParseError(synErrCharPropExpInvalidForm, "")
- }
- sym1 = p.lastTok.propSymbol
- if p.consume(tokenKindEqual) {
- if !p.consume(tokenKindCharPropSymbol) {
- p.raiseParseError(synErrCharPropExpInvalidForm, "")
- }
- sym2 = p.lastTok.propSymbol
- }
-
- var alt CPTree
- var propName, propVal string
- if sym2 != "" {
- propName = sym1
- propVal = sym2
- } else {
- propName = ""
- propVal = sym1
- }
- if !p.isContributoryPropertyExposed && ucd.IsContributoryProperty(propName) {
- p.raiseParseError(synErrCharPropUnsupported, propName)
- }
- pat, err := ucd.NormalizeCharacterProperty(propName, propVal)
- if err != nil {
- p.raiseParseError(synErrCharPropUnsupported, err.Error())
- }
- if pat != "" {
- p := NewParser(p.kind, bytes.NewReader([]byte(pat)))
- p.exposeContributoryProperty()
- ast, err := p.Parse()
- if err != nil {
- panic(err)
- }
- alt = ast
- } else {
- cpRanges, inverse, err := ucd.FindCodePointRanges(propName, propVal)
- if err != nil {
- p.raiseParseError(synErrCharPropUnsupported, err.Error())
- }
- if inverse {
- r := cpRanges[0]
- alt = exclude(newRangeSymbolNode(r.From, r.To), genAnyCharAST())
- if alt == nil {
- p.raiseParseError(synErrUnmatchablePattern, "")
- }
- for _, r := range cpRanges[1:] {
- alt = exclude(newRangeSymbolNode(r.From, r.To), alt)
- if alt == nil {
- p.raiseParseError(synErrUnmatchablePattern, "")
- }
- }
- } else {
- for _, r := range cpRanges {
- alt = genAltNode(
- alt,
- newRangeSymbolNode(r.From, r.To),
- )
- }
- }
- }
-
- if !p.consume(tokenKindRBrace) {
- p.raiseParseError(synErrCharPropExpInvalidForm, "")
- }
-
- return alt
-}
-
-func (p *parser) parseFragment() CPTree {
- if !p.consume(tokenKindLBrace) {
- p.raiseParseError(synErrFragmentExpInvalidForm, "")
- }
- if !p.consume(tokenKindFragmentSymbol) {
- p.raiseParseError(synErrFragmentExpInvalidForm, "")
- }
- sym := p.lastTok.fragmentSymbol
-
- if !p.consume(tokenKindRBrace) {
- p.raiseParseError(synErrFragmentExpInvalidForm, "")
- }
-
- return newFragmentNode(spec.LexKindName(sym), nil)
-}
-
-func (p *parser) parseNormalChar() CPTree {
- if !p.consume(tokenKindChar) {
- return nil
- }
- return newSymbolNode(p.lastTok.char)
-}
-
-func exclude(symbol, base CPTree) CPTree {
- if left, right, ok := symbol.Alternatives(); ok {
- return exclude(right, exclude(left, base))
- }
-
- if left, right, ok := base.Alternatives(); ok {
- return genAltNode(
- exclude(symbol, left),
- exclude(symbol, right),
- )
- }
-
- if bFrom, bTo, ok := base.Range(); ok {
- sFrom, sTo, ok := symbol.Range()
- if !ok {
- panic(fmt.Errorf("invalid symbol tree: %T", symbol))
- }
-
- switch {
- case sFrom > bFrom && sTo < bTo:
- return genAltNode(
- newRangeSymbolNode(bFrom, sFrom-1),
- newRangeSymbolNode(sTo+1, bTo),
- )
- case sFrom <= bFrom && sTo >= bFrom && sTo < bTo:
- return newRangeSymbolNode(sTo+1, bTo)
- case sFrom > bFrom && sFrom <= bTo && sTo >= bTo:
- return newRangeSymbolNode(bFrom, sFrom-1)
- case sFrom <= bFrom && sTo >= bTo:
- return nil
- default:
- return base
- }
- }
-
- panic(fmt.Errorf("invalid base tree: %T", base))
-}
-
-func genAnyCharAST() CPTree {
- return newRangeSymbolNode(0x0, 0x10FFFF)
-}
-
-func isValidOrder(from, to rune) bool {
- return from <= to
-}
-
-func genConcatNode(cs ...CPTree) CPTree {
- nonNilNodes := []CPTree{}
- for _, c := range cs {
- if c == nil {
- continue
- }
- nonNilNodes = append(nonNilNodes, c)
- }
- if len(nonNilNodes) <= 0 {
- return nil
- }
- if len(nonNilNodes) == 1 {
- return nonNilNodes[0]
- }
- concat := newConcatNode(nonNilNodes[0], nonNilNodes[1])
- for _, c := range nonNilNodes[2:] {
- concat = newConcatNode(concat, c)
- }
- return concat
-}
-
-func genAltNode(cs ...CPTree) CPTree {
- nonNilNodes := []CPTree{}
- for _, c := range cs {
- if c == nil {
- continue
- }
- nonNilNodes = append(nonNilNodes, c)
- }
- if len(nonNilNodes) <= 0 {
- return nil
- }
- if len(nonNilNodes) == 1 {
- return nonNilNodes[0]
- }
- alt := newAltNode(nonNilNodes[0], nonNilNodes[1])
- for _, c := range nonNilNodes[2:] {
- alt = newAltNode(alt, c)
- }
- return alt
-}
-
-func (p *parser) expect(expected tokenKind) {
- if !p.consume(expected) {
- tok := p.peekedTok
- p.raiseParseError(synErrUnexpectedToken, fmt.Sprintf("expected: %v, actual: %v", expected, tok.kind))
- }
-}
-
-func (p *parser) consume(expected tokenKind) bool {
- var tok *token
- var err error
- if p.peekedTok != nil {
- tok = p.peekedTok
- p.peekedTok = nil
- } else {
- tok, err = p.lex.next()
- if err != nil {
- if err == ParseErr {
- detail, cause := p.lex.error()
- p.raiseParseError(cause, detail)
- }
- panic(err)
- }
- }
- p.lastTok = tok
- if tok.kind == expected {
- return true
- }
- p.peekedTok = tok
- p.lastTok = nil
-
- return false
-}
-
-func (p *parser) raiseParseError(err error, detail string) {
- p.errCause = err
- p.errDetail = detail
- panic(ParseErr)
-}
diff --git a/grammar/lexical/parser/parser_test.go b/grammar/lexical/parser/parser_test.go
deleted file mode 100644
index e876d3b..0000000
--- a/grammar/lexical/parser/parser_test.go
+++ /dev/null
@@ -1,1389 +0,0 @@
-package parser
-
-import (
- "fmt"
- "reflect"
- "strings"
- "testing"
-
- spec "spec/grammar"
- "ucd"
-)
-
-func TestParse(t *testing.T) {
- tests := []struct {
- pattern string
- fragments map[spec.LexKindName]string
- ast CPTree
- syntaxError error
-
- // When an AST is large, as patterns containing a character property expression, this test only checks
- // that the pattern is parsable. The check of the validity of such AST is performed by checking that it
- // can be matched correctly using the driver.
- skipTestAST bool
- }{
- {
- pattern: "a",
- ast: newSymbolNode('a'),
- },
- {
- pattern: "abc",
- ast: genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- },
- {
- pattern: "a?",
- ast: newOptionNode(
- newSymbolNode('a'),
- ),
- },
- {
- pattern: "[abc]?",
- ast: newOptionNode(
- genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- },
- {
- pattern: "\\u{3042}?",
- ast: newOptionNode(
- newSymbolNode('\u3042'),
- ),
- },
- {
- pattern: "\\p{Letter}?",
- skipTestAST: true,
- },
- {
- pattern: "\\f{a2c}?",
- fragments: map[spec.LexKindName]string{
- "a2c": "abc",
- },
- ast: newOptionNode(
- newFragmentNode("a2c",
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- ),
- },
- {
- pattern: "(a)?",
- ast: newOptionNode(
- newSymbolNode('a'),
- ),
- },
- {
- pattern: "((a?)?)?",
- ast: newOptionNode(
- newOptionNode(
- newOptionNode(
- newSymbolNode('a'),
- ),
- ),
- ),
- },
- {
- pattern: "(abc)?",
- ast: newOptionNode(
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- },
- {
- pattern: "(a|b)?",
- ast: newOptionNode(
- genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- ),
- ),
- },
- {
- pattern: "?",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "(?)",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "a|?",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "?|b",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "a??",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "a*",
- ast: newRepeatNode(
- newSymbolNode('a'),
- ),
- },
- {
- pattern: "[abc]*",
- ast: newRepeatNode(
- genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- },
- {
- pattern: "\\u{3042}*",
- ast: newRepeatNode(
- newSymbolNode('\u3042'),
- ),
- },
- {
- pattern: "\\p{Letter}*",
- skipTestAST: true,
- },
- {
- pattern: "\\f{a2c}*",
- fragments: map[spec.LexKindName]string{
- "a2c": "abc",
- },
- ast: newRepeatNode(
- newFragmentNode("a2c",
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- ),
- },
- {
- pattern: "((a*)*)*",
- ast: newRepeatNode(
- newRepeatNode(
- newRepeatNode(
- newSymbolNode('a'),
- ),
- ),
- ),
- },
- {
- pattern: "(abc)*",
- ast: newRepeatNode(
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- },
- {
- pattern: "(a|b)*",
- ast: newRepeatNode(
- genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- ),
- ),
- },
- {
- pattern: "*",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "(*)",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "a|*",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "*|b",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "a**",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "a+",
- ast: genConcatNode(
- newSymbolNode('a'),
- newRepeatNode(
- newSymbolNode('a'),
- ),
- ),
- },
- {
- pattern: "[abc]+",
- ast: genConcatNode(
- genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- newRepeatNode(
- genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- ),
- },
- {
- pattern: "\\u{3042}+",
- ast: genConcatNode(
- newSymbolNode('\u3042'),
- newRepeatNode(
- newSymbolNode('\u3042'),
- ),
- ),
- },
- {
- pattern: "\\p{Letter}+",
- skipTestAST: true,
- },
- {
- pattern: "\\f{a2c}+",
- fragments: map[spec.LexKindName]string{
- "a2c": "abc",
- },
- ast: genConcatNode(
- newFragmentNode("a2c",
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- newRepeatNode(
- newFragmentNode("a2c",
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- ),
- ),
- },
- {
- pattern: "((a+)+)+",
- ast: genConcatNode(
- genConcatNode(
- genConcatNode(
- genConcatNode(
- newSymbolNode('a'),
- newRepeatNode(
- newSymbolNode('a'),
- ),
- ),
- newRepeatNode(
- genConcatNode(
- newSymbolNode('a'),
- newRepeatNode(
- newSymbolNode('a'),
- ),
- ),
- ),
- ),
- newRepeatNode(
- genConcatNode(
- genConcatNode(
- newSymbolNode('a'),
- newRepeatNode(
- newSymbolNode('a'),
- ),
- ),
- newRepeatNode(
- genConcatNode(
- newSymbolNode('a'),
- newRepeatNode(
- newSymbolNode('a'),
- ),
- ),
- ),
- ),
- ),
- ),
- ),
- },
- {
- pattern: "(abc)+",
- ast: genConcatNode(
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- newRepeatNode(
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- ),
- },
- {
- pattern: "(a|b)+",
- ast: genConcatNode(
- genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- ),
- newRepeatNode(
- genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- ),
- ),
- ),
- },
- {
- pattern: "+",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "(+)",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "a|+",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "+|b",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: "a++",
- syntaxError: synErrRepNoTarget,
- },
- {
- pattern: ".",
- ast: newRangeSymbolNode(0x00, 0x10FFFF),
- },
- {
- pattern: "[a]",
- ast: newSymbolNode('a'),
- },
- {
- pattern: "[abc]",
- ast: genAltNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- },
- {
- pattern: "[a-z]",
- ast: newRangeSymbolNode('a', 'z'),
- },
- {
- pattern: "[A-Za-z]",
- ast: genAltNode(
- newRangeSymbolNode('A', 'Z'),
- newRangeSymbolNode('a', 'z'),
- ),
- },
- {
- pattern: "[\\u{004E}]",
- ast: newSymbolNode('N'),
- },
- {
- pattern: "[\\u{0061}-\\u{007A}]",
- ast: newRangeSymbolNode('a', 'z'),
- },
- {
- pattern: "[\\p{Lu}]",
- skipTestAST: true,
- },
- {
- pattern: "[a-\\p{Lu}]",
- syntaxError: synErrRangePropIsUnavailable,
- },
- {
- pattern: "[\\p{Lu}-z]",
- syntaxError: synErrRangePropIsUnavailable,
- },
- {
- pattern: "[\\p{Lu}-\\p{Ll}]",
- syntaxError: synErrRangePropIsUnavailable,
- },
- {
- pattern: "[z-a]",
- syntaxError: synErrRangeInvalidOrder,
- },
- {
- pattern: "a[]",
- syntaxError: synErrBExpNoElem,
- },
- {
- pattern: "[]a",
- syntaxError: synErrBExpNoElem,
- },
- {
- pattern: "[]",
- syntaxError: synErrBExpNoElem,
- },
- {
- pattern: "[^\\u{004E}]",
- ast: genAltNode(
- newRangeSymbolNode(0x00, '\u004E'-1),
- newRangeSymbolNode('\u004E'+1, 0x10FFFF),
- ),
- },
- {
- pattern: "[^\\u{0061}-\\u{007A}]",
- ast: genAltNode(
- newRangeSymbolNode(0x00, '\u0061'-1),
- newRangeSymbolNode('\u007A'+1, 0x10FFFF),
- ),
- },
- {
- pattern: "[^\\p{Lu}]",
- skipTestAST: true,
- },
- {
- pattern: "[^a-\\p{Lu}]",
- syntaxError: synErrRangePropIsUnavailable,
- },
- {
- pattern: "[^\\p{Lu}-z]",
- syntaxError: synErrRangePropIsUnavailable,
- },
- {
- pattern: "[^\\p{Lu}-\\p{Ll}]",
- syntaxError: synErrRangePropIsUnavailable,
- },
- {
- pattern: "[^\\u{0000}-\\u{10FFFF}]",
- syntaxError: synErrUnmatchablePattern,
- },
- {
- pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]",
- syntaxError: synErrUnmatchablePattern,
- },
- {
- pattern: "[^]",
- ast: newSymbolNode('^'),
- },
- {
- pattern: "[",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "([",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "[a",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "([a",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "[a-",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "([a-",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "[^",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "([^",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "[^a",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "([^a",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "[^a-",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "([^a-",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "]",
- ast: newSymbolNode(']'),
- },
- {
- pattern: "(]",
- syntaxError: synErrGroupUnclosed,
- },
- {
- pattern: "a]",
- ast: genConcatNode(
- newSymbolNode('a'),
- newSymbolNode(']'),
- ),
- },
- {
- pattern: "(a]",
- syntaxError: synErrGroupUnclosed,
- },
- {
- pattern: "([)",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "([a)",
- syntaxError: synErrBExpUnclosed,
- },
- {
- pattern: "[a-]",
- ast: genAltNode(
- newSymbolNode('a'),
- newSymbolNode('-'),
- ),
- },
- {
- pattern: "[^a-]",
- ast: genAltNode(
- newRangeSymbolNode(0x00, 0x2C),
- newRangeSymbolNode(0x2E, 0x60),
- newRangeSymbolNode(0x62, 0x10FFFF),
- ),
- },
- {
- pattern: "[-z]",
- ast: genAltNode(
- newSymbolNode('-'),
- newSymbolNode('z'),
- ),
- },
- {
- pattern: "[^-z]",
- ast: newAltNode(
- newRangeSymbolNode(0x00, 0x2C),
- newAltNode(
- newRangeSymbolNode(0x2E, 0x79),
- newRangeSymbolNode(0x7B, 0x10FFFF),
- ),
- ),
- },
- {
- pattern: "[-]",
- ast: newSymbolNode('-'),
- },
- {
- pattern: "[^-]",
- ast: genAltNode(
- newRangeSymbolNode(0x00, 0x2C),
- newRangeSymbolNode(0x2E, 0x10FFFF),
- ),
- },
- {
- pattern: "[^01]",
- ast: genAltNode(
- newRangeSymbolNode(0x00, '0'-1),
- newRangeSymbolNode('1'+1, 0x10FFFF),
- ),
- },
- {
- pattern: "[^10]",
- ast: genAltNode(
- newRangeSymbolNode(0x00, '0'-1),
- newRangeSymbolNode('1'+1, 0x10FFFF),
- ),
- },
- {
- pattern: "[^a-z]",
- ast: genAltNode(
- newRangeSymbolNode(0x00, 'a'-1),
- newRangeSymbolNode('z'+1, 0x10FFFF),
- ),
- },
- {
- pattern: "[^az]",
- ast: genAltNode(
- newRangeSymbolNode(0x00, 'a'-1),
- genAltNode(
- newRangeSymbolNode('a'+1, 'z'-1),
- newRangeSymbolNode('z'+1, 0x10FFFF),
- ),
- ),
- },
- {
- pattern: "\\u{006E}",
- ast: newSymbolNode('\u006E'),
- },
- {
- pattern: "\\u{03BD}",
- ast: newSymbolNode('\u03BD'),
- },
- {
- pattern: "\\u{306B}",
- ast: newSymbolNode('\u306B'),
- },
- {
- pattern: "\\u{01F638}",
- ast: newSymbolNode('\U0001F638'),
- },
- {
- pattern: "\\u{0000}",
- ast: newSymbolNode('\u0000'),
- },
- {
- pattern: "\\u{10FFFF}",
- ast: newSymbolNode('\U0010FFFF'),
- },
- {
- pattern: "\\u{110000}",
- syntaxError: synErrCPExpOutOfRange,
- },
- {
- pattern: "\\u",
- syntaxError: synErrCPExpInvalidForm,
- },
- {
- pattern: "\\u{",
- syntaxError: synErrCPExpInvalidForm,
- },
- {
- pattern: "\\u{03BD",
- syntaxError: synErrCPExpInvalidForm,
- },
- {
- pattern: "\\u{}",
- syntaxError: synErrCPExpInvalidForm,
- },
- {
- pattern: "\\p{Letter}",
- skipTestAST: true,
- },
- {
- pattern: "\\p{General_Category=Letter}",
- skipTestAST: true,
- },
- {
- pattern: "\\p{ Letter }",
- skipTestAST: true,
- },
- {
- pattern: "\\p{ General_Category = Letter }",
- skipTestAST: true,
- },
- {
- pattern: "\\p",
- syntaxError: synErrCharPropExpInvalidForm,
- },
- {
- pattern: "\\p{",
- syntaxError: synErrCharPropExpInvalidForm,
- },
- {
- pattern: "\\p{Letter",
- syntaxError: synErrCharPropExpInvalidForm,
- },
- {
- pattern: "\\p{General_Category=}",
- syntaxError: synErrCharPropExpInvalidForm,
- },
- {
- pattern: "\\p{General_Category= }",
- syntaxError: synErrCharPropInvalidSymbol,
- },
- {
- pattern: "\\p{=Letter}",
- syntaxError: synErrCharPropExpInvalidForm,
- },
- {
- pattern: "\\p{ =Letter}",
- syntaxError: synErrCharPropInvalidSymbol,
- },
- {
- pattern: "\\p{=}",
- syntaxError: synErrCharPropExpInvalidForm,
- },
- {
- pattern: "\\p{}",
- syntaxError: synErrCharPropExpInvalidForm,
- },
- {
- pattern: "\\f{a2c}",
- fragments: map[spec.LexKindName]string{
- "a2c": "abc",
- },
- ast: newFragmentNode("a2c",
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- },
- {
- pattern: "\\f{ a2c }",
- fragments: map[spec.LexKindName]string{
- "a2c": "abc",
- },
- ast: newFragmentNode("a2c",
- genConcatNode(
- newSymbolNode('a'),
- newSymbolNode('b'),
- newSymbolNode('c'),
- ),
- ),
- },
- {
- pattern: "\\f",
- syntaxError: synErrFragmentExpInvalidForm,
- },
- {
- pattern: "\\f{",
- syntaxError: synErrFragmentExpInvalidForm,
- },
- {
- pattern: "\\f{a2c",
- fragments: map[spec.LexKindName]string{
- "a2c": "abc",
- },
- syntaxError: synErrFragmentExpInvalidForm,
- },
- {
- pattern: "(a)",
- ast: newSymbolNode('a'),
- },
- {
- pattern: "(((a)))",
- ast: newSymbolNode('a'),
- },
- {
- pattern: "a()",
- syntaxError: synErrGroupNoElem,
- },
- {
- pattern: "()a",
- syntaxError: synErrGroupNoElem,
- },
- {
- pattern: "()",
- syntaxError: synErrGroupNoElem,
- },
- {
- pattern: "(",
- syntaxError: synErrGroupUnclosed,
- },
- {
- pattern: "a(",
- syntaxError: synErrGroupUnclosed,
- },
- {
- pattern: "(a",
- syntaxError: synErrGroupUnclosed,
- },
- {
- pattern: "((",
- syntaxError: synErrGroupUnclosed,
- },
- {
- pattern: "((a)",
- syntaxError: synErrGroupUnclosed,
- },
- {
- pattern: ")",
- syntaxError: synErrGroupNoInitiator,
- },
- {
- pattern: "a)",
- syntaxError: synErrGroupNoInitiator,
- },
- {
- pattern: ")a",
- syntaxError: synErrGroupNoInitiator,
- },
- {
- pattern: "))",
- syntaxError: synErrGroupNoInitiator,
- },
- {
- pattern: "(a))",
- syntaxError: synErrGroupNoInitiator,
- },
- {
- pattern: "Mulder|Scully",
- ast: genAltNode(
- genConcatNode(
- newSymbolNode('M'),
- newSymbolNode('u'),
- newSymbolNode('l'),
- newSymbolNode('d'),
- newSymbolNode('e'),
- newSymbolNode('r'),
- ),
- genConcatNode(
- newSymbolNode('S'),
- newSymbolNode('c'),
- newSymbolNode('u'),
- newSymbolNode('l'),
- newSymbolNode('l'),
- newSymbolNode('y'),
- ),
- ),
- },
- {
- pattern: "Langly|Frohike|Byers",
- ast: genAltNode(
- genConcatNode(
- newSymbolNode('L'),
- newSymbolNode('a'),
- newSymbolNode('n'),
- newSymbolNode('g'),
- newSymbolNode('l'),
- newSymbolNode('y'),
- ),
- genConcatNode(
- newSymbolNode('F'),
- newSymbolNode('r'),
- newSymbolNode('o'),
- newSymbolNode('h'),
- newSymbolNode('i'),
- newSymbolNode('k'),
- newSymbolNode('e'),
- ),
- genConcatNode(
- newSymbolNode('B'),
- newSymbolNode('y'),
- newSymbolNode('e'),
- newSymbolNode('r'),
- newSymbolNode('s'),
- ),
- ),
- },
- {
- pattern: "|",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "||",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "Mulder|",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "|Scully",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "Langly|Frohike|",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "Langly||Byers",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "|Frohike|Byers",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "|Frohike|",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "Fox(|)Mulder",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "(Fox|)Mulder",
- syntaxError: synErrAltLackOfOperand,
- },
- {
- pattern: "Fox(|Mulder)",
- syntaxError: synErrAltLackOfOperand,
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) {
- fragmentTrees := map[spec.LexKindName]CPTree{}
- for kind, pattern := range tt.fragments {
- p := NewParser(kind, strings.NewReader(pattern))
- root, err := p.Parse()
- if err != nil {
- t.Fatal(err)
- }
-
- fragmentTrees[kind] = root
- }
- err := CompleteFragments(fragmentTrees)
- if err != nil {
- t.Fatal(err)
- }
-
- p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern))
- root, err := p.Parse()
- if tt.syntaxError != nil {
- // printCPTree(os.Stdout, root, "", "")
- if err != ParseErr {
- t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err)
- }
- _, synErr := p.Error()
- if synErr != tt.syntaxError {
- t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr)
- }
- if root != nil {
- t.Fatalf("tree must be nil")
- }
- } else {
- if err != nil {
- detail, cause := p.Error()
- t.Fatalf("%v: %v: %v", err, cause, detail)
- }
- if root == nil {
- t.Fatal("tree must be non-nil")
- }
-
- complete, err := ApplyFragments(root, fragmentTrees)
- if err != nil {
- t.Fatal(err)
- }
- if !complete {
- t.Fatalf("incomplete fragments")
- }
-
- // printCPTree(os.Stdout, root, "", "")
- if !tt.skipTestAST {
- r := root.(*rootNode)
- testAST(t, tt.ast, r.tree)
- }
- }
- })
- }
-}
-
-func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) {
- for _, cProp := range ucd.ContributoryProperties() {
- t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) {
- p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp)))
- root, err := p.Parse()
- if err == nil {
- t.Fatalf("expected syntax error: got: nil")
- }
- _, synErr := p.Error()
- if synErr != synErrCharPropUnsupported {
- t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr)
- }
- if root != nil {
- t.Fatalf("tree is not nil")
- }
- })
- }
-}
-
-func TestExclude(t *testing.T) {
- for _, test := range []struct {
- caption string
- target CPTree
- base CPTree
- result CPTree
- }{
- // t.From > b.From && t.To < b.To
-
- // |t.From - b.From| = 1
- // |b.To - t.To| = 1
- //
- // Target (t): +--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+ +--+
- {
- caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1",
- target: newSymbolNode('1'),
- base: newRangeSymbolNode('0', '2'),
- result: newAltNode(
- newSymbolNode('0'),
- newSymbolNode('2'),
- ),
- },
- // |t.From - b.From| > 1
- // |b.To - t.To| > 1
- //
- // Target (t): +--+
- // Base (b): +--+--+--+--+--+
- // Result (b - t): +--+--+ +--+--+
- {
- caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1",
- target: newSymbolNode('2'),
- base: newRangeSymbolNode('0', '4'),
- result: newAltNode(
- newRangeSymbolNode('0', '1'),
- newRangeSymbolNode('3', '4'),
- ),
- },
-
- // t.From <= b.From && t.To >= b.From && t.To < b.To
-
- // |b.From - t.From| = 0
- // |t.To - b.From| = 0
- // |b.To - t.To| = 1
- //
- // Target (t): +--+
- // Base (b): +--+--+
- // Result (b - t): +--+
- {
- caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
- target: newSymbolNode('0'),
- base: newRangeSymbolNode('0', '1'),
- result: newSymbolNode('1'),
- },
- // |b.From - t.From| = 0
- // |t.To - b.From| = 0
- // |b.To - t.To| > 1
- //
- // Target (t): +--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+--+
- {
- caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
- target: newSymbolNode('0'),
- base: newRangeSymbolNode('0', '2'),
- result: newRangeSymbolNode('1', '2'),
- },
- // |b.From - t.From| = 0
- // |t.To - b.From| > 0
- // |b.To - t.To| = 1
- //
- // Target (t): +--+--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+
- {
- caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
- target: newRangeSymbolNode('0', '1'),
- base: newRangeSymbolNode('0', '2'),
- result: newSymbolNode('2'),
- },
- // |b.From - t.From| = 0
- // |t.To - b.From| > 0
- // |b.To - t.To| > 1
- //
- // Target (t): +--+--+
- // Base (b): +--+--+--+--+
- // Result (b - t): +--+--+
- {
- caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
- target: newRangeSymbolNode('0', '1'),
- base: newRangeSymbolNode('0', '3'),
- result: newRangeSymbolNode('2', '3'),
- },
- // |b.From - t.From| > 0
- // |t.To - b.From| = 0
- // |b.To - t.To| = 1
- //
- // Target (t): +--+--+
- // Base (b): +--+--+
- // Result (b - t): +--+
- {
- caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1",
- target: newRangeSymbolNode('0', '1'),
- base: newRangeSymbolNode('1', '2'),
- result: newSymbolNode('2'),
- },
- // |b.From - t.From| > 0
- // |t.To - b.From| = 0
- // |b.To - t.To| > 1
- //
- // Target (t): +--+--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+--+
- {
- caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1",
- target: newRangeSymbolNode('0', '1'),
- base: newRangeSymbolNode('1', '3'),
- result: newRangeSymbolNode('2', '3'),
- },
- // |b.From - t.From| > 0
- // |t.To - b.From| > 0
- // |b.To - t.To| = 1
- //
- // Target (t): +--+--+--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+
- {
- caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1",
- target: newRangeSymbolNode('0', '2'),
- base: newRangeSymbolNode('1', '3'),
- result: newSymbolNode('3'),
- },
- // |b.From - t.From| > 0
- // |t.To - b.From| > 0
- // |b.To - t.To| > 1
- //
- // Target (t): +--+--+--+
- // Base (b): +--+--+--+--+
- // Result (b - t): +--+--+
- {
- caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1",
- target: newRangeSymbolNode('0', '2'),
- base: newRangeSymbolNode('1', '4'),
- result: newRangeSymbolNode('3', '4'),
- },
-
- // t.From > b.From && t.From <= b.To && t.To >= b.To
-
- // |t.From - b.From| = 1
- // |b.To - t.From| = 0
- // |t.To - b.To| = 0
- //
- // Target (t): +--+
- // Base (b): +--+--+
- // Result (b - t): +--+
- {
- caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
- target: newSymbolNode('1'),
- base: newRangeSymbolNode('0', '1'),
- result: newSymbolNode('0'),
- },
- // |t.From - b.From| = 1
- // |b.To - t.From| = 0
- // |t.To - b.To| > 0
- //
- // Target (t): +--+--+
- // Base (b): +--+--+
- // Result (b - t): +--+
- {
- caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
- target: newRangeSymbolNode('1', '2'),
- base: newRangeSymbolNode('0', '1'),
- result: newSymbolNode('0'),
- },
- // |t.From - b.From| = 1
- // |b.To - t.From| > 0
- // |t.To - b.To| = 0
- //
- // Target (t): +--+--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+
- {
- caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
- target: newRangeSymbolNode('1', '2'),
- base: newRangeSymbolNode('0', '2'),
- result: newSymbolNode('0'),
- },
- // |t.From - b.From| = 1
- // |b.To - t.From| > 0
- // |t.To - b.To| > 0
- //
- // Target (t): +--+--+--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+
- {
- caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
- target: newRangeSymbolNode('1', '3'),
- base: newRangeSymbolNode('0', '2'),
- result: newSymbolNode('0'),
- },
- // |t.From - b.From| > 1
- // |b.To - t.From| = 0
- // |t.To - b.To| = 0
- //
- // Target (t): +--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+--+
- {
- caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0",
- target: newSymbolNode('2'),
- base: newRangeSymbolNode('0', '2'),
- result: newRangeSymbolNode('0', '1'),
- },
- // |t.From - b.From| > 1
- // |b.To - t.From| = 0
- // |t.To - b.To| > 0
- //
- // Target (t): +--+--+
- // Base (b): +--+--+--+
- // Result (b - t): +--+--+
- {
- caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0",
- target: newRangeSymbolNode('2', '3'),
- base: newRangeSymbolNode('0', '2'),
- result: newRangeSymbolNode('0', '1'),
- },
- // |t.From - b.From| > 1
- // |b.To - t.From| > 0
- // |t.To - b.To| = 0
- //
- // Target (t): +--+--+
- // Base (b): +--+--+--+--+
- // Result (b - t): +--+--+
- {
- caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0",
- target: newRangeSymbolNode('2', '3'),
- base: newRangeSymbolNode('0', '3'),
- result: newRangeSymbolNode('0', '1'),
- },
- // |t.From - b.From| > 1
- // |b.To - t.From| > 0
- // |t.To - b.To| > 0
- //
- // Target (t): +--+--+--+
- // Base (b): +--+--+--+--+
- // Result (b - t): +--+--+
- {
- caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0",
- target: newRangeSymbolNode('2', '4'),
- base: newRangeSymbolNode('0', '3'),
- result: newRangeSymbolNode('0', '1'),
- },
-
- // t.From <= b.From && t.To >= b.To
-
- // |b.From - t.From| = 0
- // |t.To - b.To| = 0
- //
- // Target (t): +--+
- // Base (b): +--+
- // Result (b - t): N/A
- {
- caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0",
- target: newSymbolNode('0'),
- base: newSymbolNode('0'),
- result: nil,
- },
- // |b.From - t.From| = 0
- // |t.To - b.To| > 0
- //
- // Target (t): +--+--+
- // Base (b): +--+
- // Result (b - t): N/A
- {
- caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0",
- target: newRangeSymbolNode('0', '1'),
- base: newSymbolNode('0'),
- result: nil,
- },
- // |b.From - t.From| > 0
- // |t.To - b.To| = 0
- //
- // Target (t): +--+--+
- // Base (b): +--+
- // Result (b - t): N/A
- {
- caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0",
- target: newRangeSymbolNode('0', '1'),
- base: newSymbolNode('1'),
- result: nil,
- },
- // |b.From - t.From| > 0
- // |t.To - b.To| > 0
- //
- // Target (t): +--+--+--+
- // Base (b): +--+
- // Result (b - t): N/A
- {
- caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0",
- target: newRangeSymbolNode('0', '2'),
- base: newSymbolNode('1'),
- result: nil,
- },
-
- // Others
-
- // |b.From - t.From| = 1
- //
- // Target (t): +--+
- // Base (b): +--+
- // Result (b - t): +--+
- {
- caption: "|b.From - t.From| = 1",
- target: newSymbolNode('0'),
- base: newSymbolNode('1'),
- result: newSymbolNode('1'),
- },
- // |b.From - t.From| > 1
- //
- // Target (t): +--+
- // Base (b): +--+
- // Result (b - t): +--+
- {
- caption: "|b.From - t.From| > 1",
- target: newSymbolNode('0'),
- base: newSymbolNode('2'),
- result: newSymbolNode('2'),
- },
- // |t.To - b.To| = 1
- //
- // Target (t): +--+
- // Base (b): +--+
- // Result (b - t): +--+
- {
- caption: "|t.To - b.To| = 1",
- target: newSymbolNode('1'),
- base: newSymbolNode('0'),
- result: newSymbolNode('0'),
- },
- // |t.To - b.To| > 1
- //
- // Target (t): +--+
- // Base (b): +--+
- // Result (b - t): +--+
- {
- caption: "|t.To - b.To| > 1",
- target: newSymbolNode('2'),
- base: newSymbolNode('0'),
- result: newSymbolNode('0'),
- },
- } {
- t.Run(test.caption, func(t *testing.T) {
- r := exclude(test.target, test.base)
- testAST(t, test.result, r)
- })
- }
-}
-
-func testAST(t *testing.T, expected, actual CPTree) {
- t.Helper()
-
- aTy := reflect.TypeOf(actual)
- eTy := reflect.TypeOf(expected)
- if eTy != aTy {
- t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy)
- }
-
- if actual == nil {
- return
- }
-
- switch e := expected.(type) {
- case *symbolNode:
- a := actual.(*symbolNode)
- if a.From != e.From || a.To != e.To {
- t.Fatalf("unexpected node: want: %+v, got: %+v", e, a)
- }
- }
- eLeft, eRight := expected.children()
- aLeft, aRight := actual.children()
- testAST(t, eLeft, aLeft)
- testAST(t, eRight, aRight)
-}
diff --git a/grammar/lexical/parser/tree.go b/grammar/lexical/parser/tree.go
deleted file mode 100644
index 0d64e1d..0000000
--- a/grammar/lexical/parser/tree.go
+++ /dev/null
@@ -1,459 +0,0 @@
-package parser
-
-import (
- "fmt"
- "io"
- "sort"
-
- spec "spec/grammar"
-)
-
-type CPRange struct {
- From rune
- To rune
-}
-
-type CPTree interface {
- fmt.Stringer
- Range() (rune, rune, bool)
- Optional() (CPTree, bool)
- Repeatable() (CPTree, bool)
- Concatenation() (CPTree, CPTree, bool)
- Alternatives() (CPTree, CPTree, bool)
- Describe() (spec.LexKindName, []spec.LexKindName, error)
-
- children() (CPTree, CPTree)
- clone() CPTree
-}
-
-var (
- _ CPTree = &rootNode{}
- _ CPTree = &symbolNode{}
- _ CPTree = &concatNode{}
- _ CPTree = &altNode{}
- _ CPTree = &quantifierNode{}
- _ CPTree = &fragmentNode{}
-)
-
-type rootNode struct {
- kind spec.LexKindName
- tree CPTree
- fragments map[spec.LexKindName][]*fragmentNode
-}
-
-func newRootNode(kind spec.LexKindName, t CPTree) *rootNode {
- fragments := map[spec.LexKindName][]*fragmentNode{}
- collectFragments(t, fragments)
-
- return &rootNode{
- kind: kind,
- tree: t,
- fragments: fragments,
- }
-}
-
-func collectFragments(n CPTree, fragments map[spec.LexKindName][]*fragmentNode) {
- if n == nil {
- return
- }
-
- if f, ok := n.(*fragmentNode); ok {
- fragments[f.kind] = append(fragments[f.kind], f)
- return
- }
-
- l, r := n.children()
- collectFragments(l, fragments)
- collectFragments(r, fragments)
-}
-
-func (n *rootNode) String() string {
- return fmt.Sprintf("root: %v: %v fragments", n.kind, len(n.fragments))
-}
-
-func (n *rootNode) Range() (rune, rune, bool) {
- return n.tree.Range()
-}
-
-func (n *rootNode) Optional() (CPTree, bool) {
- return n.tree.Optional()
-}
-
-func (n *rootNode) Repeatable() (CPTree, bool) {
- return n.tree.Repeatable()
-}
-
-func (n *rootNode) Concatenation() (CPTree, CPTree, bool) {
- return n.tree.Concatenation()
-}
-
-func (n *rootNode) Alternatives() (CPTree, CPTree, bool) {
- return n.tree.Alternatives()
-}
-
-func (n *rootNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
- var frags []spec.LexKindName
- for f := range n.fragments {
- frags = append(frags, spec.LexKindName(f))
- }
- sort.Slice(frags, func(i, j int) bool {
- return frags[i] < frags[j]
- })
-
- return n.kind, frags, nil
-}
-
-func (n *rootNode) children() (CPTree, CPTree) {
- return n.tree.children()
-}
-
-func (n *rootNode) clone() CPTree {
- return n.tree.clone()
-}
-
-func (n *rootNode) incomplete() bool {
- return len(n.fragments) > 0
-}
-
-func (n *rootNode) applyFragment(kind spec.LexKindName, fragment CPTree) error {
- root, ok := fragment.(*rootNode)
- if !ok {
- return fmt.Errorf("applyFragment can take only *rootNode: %T", fragment)
- }
- if root.incomplete() {
- return fmt.Errorf("fragment is incomplete")
- }
-
- fs, ok := n.fragments[kind]
- if !ok {
- return nil
- }
- for _, f := range fs {
- f.tree = root.clone()
- }
- delete(n.fragments, kind)
-
- return nil
-}
-
-type symbolNode struct {
- CPRange
-}
-
-func newSymbolNode(cp rune) *symbolNode {
- return &symbolNode{
- CPRange: CPRange{
- From: cp,
- To: cp,
- },
- }
-}
-
-func newRangeSymbolNode(from, to rune) *symbolNode {
- return &symbolNode{
- CPRange: CPRange{
- From: from,
- To: to,
- },
- }
-}
-
-func (n *symbolNode) String() string {
- return fmt.Sprintf("symbol: %X..%X", n.From, n.To)
-}
-
-func (n *symbolNode) Range() (rune, rune, bool) {
- return n.From, n.To, true
-}
-
-func (n *symbolNode) Optional() (CPTree, bool) {
- return nil, false
-}
-
-func (n *symbolNode) Repeatable() (CPTree, bool) {
- return nil, false
-}
-
-func (n *symbolNode) Concatenation() (CPTree, CPTree, bool) {
- return nil, nil, false
-}
-
-func (n *symbolNode) Alternatives() (CPTree, CPTree, bool) {
- return nil, nil, false
-}
-
-func (n *symbolNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
- return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
-}
-
-func (n *symbolNode) children() (CPTree, CPTree) {
- return nil, nil
-}
-
-func (n *symbolNode) clone() CPTree {
- return newRangeSymbolNode(n.From, n.To)
-}
-
-type concatNode struct {
- left CPTree
- right CPTree
-}
-
-func newConcatNode(left, right CPTree) *concatNode {
- return &concatNode{
- left: left,
- right: right,
- }
-}
-
-func (n *concatNode) String() string {
- return "concat"
-}
-
-func (n *concatNode) Range() (rune, rune, bool) {
- return 0, 0, false
-}
-
-func (n *concatNode) Optional() (CPTree, bool) {
- return nil, false
-}
-
-func (n *concatNode) Repeatable() (CPTree, bool) {
- return nil, false
-}
-
-func (n *concatNode) Concatenation() (CPTree, CPTree, bool) {
- return n.left, n.right, true
-}
-
-func (n *concatNode) Alternatives() (CPTree, CPTree, bool) {
- return nil, nil, false
-}
-
-func (n *concatNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
- return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
-}
-
-func (n *concatNode) children() (CPTree, CPTree) {
- return n.left, n.right
-}
-
-func (n *concatNode) clone() CPTree {
- if n == nil {
- return nil
- }
- return newConcatNode(n.left.clone(), n.right.clone())
-}
-
-type altNode struct {
- left CPTree
- right CPTree
-}
-
-func newAltNode(left, right CPTree) *altNode {
- return &altNode{
- left: left,
- right: right,
- }
-}
-
-func (n *altNode) String() string {
- return "alt"
-}
-
-func (n *altNode) Range() (rune, rune, bool) {
- return 0, 0, false
-}
-
-func (n *altNode) Optional() (CPTree, bool) {
- return nil, false
-}
-
-func (n *altNode) Repeatable() (CPTree, bool) {
- return nil, false
-}
-
-func (n *altNode) Concatenation() (CPTree, CPTree, bool) {
- return nil, nil, false
-}
-
-func (n *altNode) Alternatives() (CPTree, CPTree, bool) {
- return n.left, n.right, true
-}
-
-func (n *altNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
- return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
-}
-
-func (n *altNode) children() (CPTree, CPTree) {
- return n.left, n.right
-}
-
-func (n *altNode) clone() CPTree {
- return newAltNode(n.left.clone(), n.right.clone())
-}
-
-type quantifierNode struct {
- optional bool
- repeatable bool
- tree CPTree
-}
-
-func (n *quantifierNode) String() string {
- switch {
- case n.repeatable:
- return "repeatable (>= 0 times)"
- case n.optional:
- return "optional (0 or 1 times)"
- default:
- return "invalid quantifier"
- }
-}
-
-func newRepeatNode(t CPTree) *quantifierNode {
- return &quantifierNode{
- repeatable: true,
- tree: t,
- }
-}
-
-func newRepeatOneOrMoreNode(t CPTree) *concatNode {
- return newConcatNode(
- t,
- &quantifierNode{
- repeatable: true,
- tree: t.clone(),
- })
-}
-
-func newOptionNode(t CPTree) *quantifierNode {
- return &quantifierNode{
- optional: true,
- tree: t,
- }
-}
-
-func (n *quantifierNode) Range() (rune, rune, bool) {
- return 0, 0, false
-}
-
-func (n *quantifierNode) Optional() (CPTree, bool) {
- return n.tree, n.optional
-}
-
-func (n *quantifierNode) Repeatable() (CPTree, bool) {
- return n.tree, n.repeatable
-}
-
-func (n *quantifierNode) Concatenation() (CPTree, CPTree, bool) {
- return nil, nil, false
-}
-
-func (n *quantifierNode) Alternatives() (CPTree, CPTree, bool) {
- return nil, nil, false
-}
-
-func (n *quantifierNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
- return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
-}
-
-func (n *quantifierNode) children() (CPTree, CPTree) {
- return n.tree, nil
-}
-
-func (n *quantifierNode) clone() CPTree {
- if n.repeatable {
- return newRepeatNode(n.tree.clone())
- }
- return newOptionNode(n.tree.clone())
-}
-
-type fragmentNode struct {
- kind spec.LexKindName
- tree CPTree
-}
-
-func newFragmentNode(kind spec.LexKindName, t CPTree) *fragmentNode {
- return &fragmentNode{
- kind: kind,
- tree: t,
- }
-}
-
-func (n *fragmentNode) String() string {
- return fmt.Sprintf("fragment: %v", n.kind)
-}
-
-func (n *fragmentNode) Range() (rune, rune, bool) {
- return n.tree.Range()
-}
-
-func (n *fragmentNode) Optional() (CPTree, bool) {
- return n.tree.Optional()
-}
-
-func (n *fragmentNode) Repeatable() (CPTree, bool) {
- return n.tree.Repeatable()
-}
-
-func (n *fragmentNode) Concatenation() (CPTree, CPTree, bool) {
- return n.tree.Concatenation()
-}
-
-func (n *fragmentNode) Alternatives() (CPTree, CPTree, bool) {
- return n.tree.Alternatives()
-}
-
-func (n *fragmentNode) Describe() (spec.LexKindName, []spec.LexKindName, error) {
- return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n)
-}
-
-func (n *fragmentNode) children() (CPTree, CPTree) {
- return n.tree.children()
-}
-
-func (n *fragmentNode) clone() CPTree {
- if n.tree == nil {
- return newFragmentNode(n.kind, nil)
- }
- return newFragmentNode(n.kind, n.tree.clone())
-}
-
-//nolint:unused
-func printCPTree(w io.Writer, t CPTree, ruledLine string, childRuledLinePrefix string) {
- if t == nil {
- return
- }
- fmt.Fprintf(w, "%v%v\n", ruledLine, t)
- children := []CPTree{}
- switch n := t.(type) {
- case *rootNode:
- children = append(children, n.tree)
- case *fragmentNode:
- children = append(children, n.tree)
- default:
- left, right := t.children()
- if left != nil {
- children = append(children, left)
- }
- if right != nil {
- children = append(children, right)
- }
- }
- num := len(children)
- for i, child := range children {
- line := "└─ "
- if num > 1 {
- if i == 0 {
- line = "├─ "
- } else if i < num-1 {
- line = "│ "
- }
- }
- prefix := "│ "
- if i >= num-1 {
- prefix = " "
- }
- printCPTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix)
- }
-}
diff --git a/grammar/lr0.go b/grammar/lr0.go
deleted file mode 100644
index 7e5929a..0000000
--- a/grammar/lr0.go
+++ /dev/null
@@ -1,197 +0,0 @@
-package grammar
-
-import (
- "fmt"
- "sort"
-
- "grammar/symbol"
-)
-
-type lr0Automaton struct {
- initialState kernelID
- states map[kernelID]*lrState
-}
-
-func genLR0Automaton(prods *productionSet, startSym symbol.Symbol, errSym symbol.Symbol) (*lr0Automaton, error) {
- if !startSym.IsStart() {
- return nil, fmt.Errorf("passed symbold is not a start symbol")
- }
-
- automaton := &lr0Automaton{
- states: map[kernelID]*lrState{},
- }
-
- currentState := stateNumInitial
- knownKernels := map[kernelID]struct{}{}
- uncheckedKernels := []*kernel{}
-
- // Generate an initial kernel.
- {
- prods, _ := prods.findByLHS(startSym)
- initialItem, err := newLR0Item(prods[0], 0)
- if err != nil {
- return nil, err
- }
-
- k, err := newKernel([]*lrItem{initialItem})
- if err != nil {
- return nil, err
- }
-
- automaton.initialState = k.id
- knownKernels[k.id] = struct{}{}
- uncheckedKernels = append(uncheckedKernels, k)
- }
-
- for len(uncheckedKernels) > 0 {
- nextUncheckedKernels := []*kernel{}
- for _, k := range uncheckedKernels {
- state, neighbours, err := genStateAndNeighbourKernels(k, prods, errSym)
- if err != nil {
- return nil, err
- }
- state.num = currentState
- currentState = currentState.next()
-
- automaton.states[state.id] = state
-
- for _, k := range neighbours {
- if _, known := knownKernels[k.id]; known {
- continue
- }
- knownKernels[k.id] = struct{}{}
- nextUncheckedKernels = append(nextUncheckedKernels, k)
- }
- }
- uncheckedKernels = nextUncheckedKernels
- }
-
- return automaton, nil
-}
-
-func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol.Symbol) (*lrState, []*kernel, error) {
- items, err := genLR0Closure(k, prods)
- if err != nil {
- return nil, nil, err
- }
- neighbours, err := genNeighbourKernels(items, prods)
- if err != nil {
- return nil, nil, err
- }
-
- next := map[symbol.Symbol]kernelID{}
- kernels := []*kernel{}
- for _, n := range neighbours {
- next[n.symbol] = n.kernel.id
- kernels = append(kernels, n.kernel)
- }
-
- reducible := map[productionID]struct{}{}
- var emptyProdItems []*lrItem
- isErrorTrapper := false
- for _, item := range items {
- if item.dottedSymbol == errSym {
- isErrorTrapper = true
- }
-
- if item.reducible {
- reducible[item.prod] = struct{}{}
-
- prod, ok := prods.findByID(item.prod)
- if !ok {
- return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod)
- }
- if prod.isEmpty() {
- emptyProdItems = append(emptyProdItems, item)
- }
- }
- }
-
- return &lrState{
- kernel: k,
- next: next,
- reducible: reducible,
- emptyProdItems: emptyProdItems,
- isErrorTrapper: isErrorTrapper,
- }, kernels, nil
-}
-
-func genLR0Closure(k *kernel, prods *productionSet) ([]*lrItem, error) {
- items := []*lrItem{}
- knownItems := map[lrItemID]struct{}{}
- uncheckedItems := []*lrItem{}
- for _, item := range k.items {
- items = append(items, item)
- uncheckedItems = append(uncheckedItems, item)
- }
- for len(uncheckedItems) > 0 {
- nextUncheckedItems := []*lrItem{}
- for _, item := range uncheckedItems {
- if item.dottedSymbol.IsTerminal() {
- continue
- }
-
- ps, _ := prods.findByLHS(item.dottedSymbol)
- for _, prod := range ps {
- item, err := newLR0Item(prod, 0)
- if err != nil {
- return nil, err
- }
- if _, exist := knownItems[item.id]; exist {
- continue
- }
- items = append(items, item)
- knownItems[item.id] = struct{}{}
- nextUncheckedItems = append(nextUncheckedItems, item)
- }
- }
- uncheckedItems = nextUncheckedItems
- }
-
- return items, nil
-}
-
-type neighbourKernel struct {
- symbol symbol.Symbol
- kernel *kernel
-}
-
-func genNeighbourKernels(items []*lrItem, prods *productionSet) ([]*neighbourKernel, error) {
- kItemMap := map[symbol.Symbol][]*lrItem{}
- for _, item := range items {
- if item.dottedSymbol.IsNil() {
- continue
- }
- prod, ok := prods.findByID(item.prod)
- if !ok {
- return nil, fmt.Errorf("a production was not found: %v", item.prod)
- }
- kItem, err := newLR0Item(prod, item.dot+1)
- if err != nil {
- return nil, err
- }
- kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem)
- }
-
- nextSyms := []symbol.Symbol{}
- for sym := range kItemMap {
- nextSyms = append(nextSyms, sym)
- }
- sort.Slice(nextSyms, func(i, j int) bool {
- return nextSyms[i] < nextSyms[j]
- })
-
- kernels := []*neighbourKernel{}
- for _, sym := range nextSyms {
- k, err := newKernel(kItemMap[sym])
- if err != nil {
- return nil, err
- }
- kernels = append(kernels, &neighbourKernel{
- symbol: sym,
- kernel: k,
- })
- }
-
- return kernels, nil
-}
diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go
deleted file mode 100644
index 798c2fa..0000000
--- a/grammar/lr0_test.go
+++ /dev/null
@@ -1,448 +0,0 @@
-package grammar
-
-import (
- "fmt"
- "strings"
- "testing"
-
- "grammar/symbol"
- "spec/grammar/parser"
-)
-
-type expectedLRState struct {
- kernelItems []*lrItem
- nextStates map[symbol.Symbol][]*lrItem
- reducibleProds []*production
- emptyProdItems []*lrItem
-}
-
-func TestGenLR0Automaton(t *testing.T) {
- src := `
-#name test;
-
-expr
- : expr add term
- | term
- ;
-term
- : term mul factor
- | factor
- ;
-factor
- : l_paren expr r_paren
- | id
- ;
-add: "\+";
-mul: "\*";
-l_paren: "\(";
-r_paren: "\)";
-id: "[A-Za-z_][0-9A-Za-z_]*";
-`
-
- var gram *Grammar
- var automaton *lr0Automaton
- {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err = b.build()
- if err != nil {
- t.Fatal(err)
- }
-
- automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- t.Fatalf("failed to create a LR0 automaton: %v", err)
- }
- if automaton == nil {
- t.Fatalf("genLR0Automaton returns nil without any error")
- }
- }
-
- initialState := automaton.states[automaton.initialState]
- if initialState == nil {
- t.Errorf("failed to get an initial status: %v", automaton.initialState)
- }
-
- genSym := newTestSymbolGenerator(t, gram.symbolTable)
- genProd := newTestProductionGenerator(t, genSym)
- genLR0Item := newTestLR0ItemGenerator(t, genProd)
-
- expectedKernels := map[int][]*lrItem{
- 0: {
- genLR0Item("expr'", 0, "expr"),
- },
- 1: {
- genLR0Item("expr'", 1, "expr"),
- genLR0Item("expr", 1, "expr", "add", "term"),
- },
- 2: {
- genLR0Item("expr", 1, "term"),
- genLR0Item("term", 1, "term", "mul", "factor"),
- },
- 3: {
- genLR0Item("term", 1, "factor"),
- },
- 4: {
- genLR0Item("factor", 1, "l_paren", "expr", "r_paren"),
- },
- 5: {
- genLR0Item("factor", 1, "id"),
- },
- 6: {
- genLR0Item("expr", 2, "expr", "add", "term"),
- },
- 7: {
- genLR0Item("term", 2, "term", "mul", "factor"),
- },
- 8: {
- genLR0Item("expr", 1, "expr", "add", "term"),
- genLR0Item("factor", 2, "l_paren", "expr", "r_paren"),
- },
- 9: {
- genLR0Item("expr", 3, "expr", "add", "term"),
- genLR0Item("term", 1, "term", "mul", "factor"),
- },
- 10: {
- genLR0Item("term", 3, "term", "mul", "factor"),
- },
- 11: {
- genLR0Item("factor", 3, "l_paren", "expr", "r_paren"),
- },
- }
-
- expectedStates := []*expectedLRState{
- {
- kernelItems: expectedKernels[0],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("expr"): expectedKernels[1],
- genSym("term"): expectedKernels[2],
- genSym("factor"): expectedKernels[3],
- genSym("l_paren"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[1],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("add"): expectedKernels[6],
- },
- reducibleProds: []*production{
- genProd("expr'", "expr"),
- },
- },
- {
- kernelItems: expectedKernels[2],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("mul"): expectedKernels[7],
- },
- reducibleProds: []*production{
- genProd("expr", "term"),
- },
- },
- {
- kernelItems: expectedKernels[3],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("term", "factor"),
- },
- },
- {
- kernelItems: expectedKernels[4],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("expr"): expectedKernels[8],
- genSym("term"): expectedKernels[2],
- genSym("factor"): expectedKernels[3],
- genSym("l_paren"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[5],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("factor", "id"),
- },
- },
- {
- kernelItems: expectedKernels[6],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("term"): expectedKernels[9],
- genSym("factor"): expectedKernels[3],
- genSym("l_paren"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[7],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("factor"): expectedKernels[10],
- genSym("l_paren"): expectedKernels[4],
- genSym("id"): expectedKernels[5],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[8],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("add"): expectedKernels[6],
- genSym("r_paren"): expectedKernels[11],
- },
- reducibleProds: []*production{},
- },
- {
- kernelItems: expectedKernels[9],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("mul"): expectedKernels[7],
- },
- reducibleProds: []*production{
- genProd("expr", "expr", "add", "term"),
- },
- },
- {
- kernelItems: expectedKernels[10],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("term", "term", "mul", "factor"),
- },
- },
- {
- kernelItems: expectedKernels[11],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("factor", "l_paren", "expr", "r_paren"),
- },
- },
- }
-
- testLRAutomaton(t, expectedStates, automaton)
-}
-
-func TestLR0AutomatonContainingEmptyProduction(t *testing.T) {
- src := `
-#name test;
-
-s
- : foo bar
- ;
-foo
- :
- ;
-bar
- : b
- |
- ;
-
-b: "bar";
-`
-
- var gram *Grammar
- var automaton *lr0Automaton
- {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
-
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err = b.build()
- if err != nil {
- t.Fatal(err)
- }
-
- automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- t.Fatalf("failed to create a LR0 automaton: %v", err)
- }
- if automaton == nil {
- t.Fatalf("genLR0Automaton returns nil without any error")
- }
- }
-
- initialState := automaton.states[automaton.initialState]
- if initialState == nil {
- t.Errorf("failed to get an initial status: %v", automaton.initialState)
- }
-
- genSym := newTestSymbolGenerator(t, gram.symbolTable)
- genProd := newTestProductionGenerator(t, genSym)
- genLR0Item := newTestLR0ItemGenerator(t, genProd)
-
- expectedKernels := map[int][]*lrItem{
- 0: {
- genLR0Item("s'", 0, "s"),
- },
- 1: {
- genLR0Item("s'", 1, "s"),
- },
- 2: {
- genLR0Item("s", 1, "foo", "bar"),
- },
- 3: {
- genLR0Item("s", 2, "foo", "bar"),
- },
- 4: {
- genLR0Item("bar", 1, "b"),
- },
- }
-
- expectedStates := []*expectedLRState{
- {
- kernelItems: expectedKernels[0],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("s"): expectedKernels[1],
- genSym("foo"): expectedKernels[2],
- },
- reducibleProds: []*production{
- genProd("foo"),
- },
- emptyProdItems: []*lrItem{
- genLR0Item("foo", 0),
- },
- },
- {
- kernelItems: expectedKernels[1],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s'", "s"),
- },
- },
- {
- kernelItems: expectedKernels[2],
- nextStates: map[symbol.Symbol][]*lrItem{
- genSym("bar"): expectedKernels[3],
- genSym("b"): expectedKernels[4],
- },
- reducibleProds: []*production{
- genProd("bar"),
- },
- emptyProdItems: []*lrItem{
- genLR0Item("bar", 0),
- },
- },
- {
- kernelItems: expectedKernels[3],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("s", "foo", "bar"),
- },
- },
- {
- kernelItems: expectedKernels[4],
- nextStates: map[symbol.Symbol][]*lrItem{},
- reducibleProds: []*production{
- genProd("bar", "b"),
- },
- },
- }
-
- testLRAutomaton(t, expectedStates, automaton)
-}
-
-func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) {
- if len(automaton.states) != len(expected) {
- t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states))
- }
-
- for i, eState := range expected {
- t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) {
- k, err := newKernel(eState.kernelItems)
- if err != nil {
- t.Fatalf("failed to create a kernel item: %v", err)
- }
-
- state, ok := automaton.states[k.id]
- if !ok {
- t.Fatalf("a kernel was not found: %v", k.id)
- }
-
- // test look-ahead symbols
- {
- if len(state.kernel.items) != len(eState.kernelItems) {
- t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items))
- }
- for _, eKItem := range eState.kernelItems {
- var kItem *lrItem
- for _, it := range state.kernel.items {
- if it.id != eKItem.id {
- continue
- }
- kItem = it
- break
- }
- if kItem == nil {
- t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id)
- }
-
- if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) {
- t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols))
- }
-
- for eSym := range eKItem.lookAhead.symbols {
- if _, ok := kItem.lookAhead.symbols[eSym]; !ok {
- t.Errorf("look-ahead symbol not found: %v", eSym)
- }
- }
- }
- }
-
- // test next states
- {
- if len(state.next) != len(eState.nextStates) {
- t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next))
- }
- for eSym, eKItems := range eState.nextStates {
- nextStateKernel, err := newKernel(eKItems)
- if err != nil {
- t.Fatalf("failed to create a kernel item: %v", err)
- }
- nextState, ok := state.next[eSym]
- if !ok {
- t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym)
- }
- if nextState != nextStateKernel.id {
- t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState)
- }
- }
- }
-
- // test reducible productions
- {
- if len(state.reducible) != len(eState.reducibleProds) {
- t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible))
- }
- for _, eProd := range eState.reducibleProds {
- if _, ok := state.reducible[eProd.id]; !ok {
- t.Errorf("reducible production was not found: %v", eProd.id)
- }
- }
-
- if len(state.emptyProdItems) != len(eState.emptyProdItems) {
- t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems))
- }
- for _, eItem := range eState.emptyProdItems {
- found := false
- for _, item := range state.emptyProdItems {
- if item.id != eItem.id {
- continue
- }
- found = true
- break
- }
- if !found {
- t.Errorf("empty production item not found: %v", eItem.id)
- }
- }
- }
- })
- }
-}
diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go
deleted file mode 100644
index fc520cc..0000000
--- a/grammar/parsing_table.go
+++ /dev/null
@@ -1,553 +0,0 @@
-package grammar
-
-import (
- "fmt"
- "sort"
-
- "grammar/symbol"
- spec "spec/grammar"
-)
-
-type ActionType string
-
-const (
- ActionTypeShift = ActionType("shift")
- ActionTypeReduce = ActionType("reduce")
- ActionTypeError = ActionType("error")
-)
-
-type actionEntry int
-
-const actionEntryEmpty = actionEntry(0)
-
-func newShiftActionEntry(state stateNum) actionEntry {
- return actionEntry(state * -1)
-}
-
-func newReduceActionEntry(prod productionNum) actionEntry {
- return actionEntry(prod)
-}
-
-func (e actionEntry) isEmpty() bool {
- return e == actionEntryEmpty
-}
-
-func (e actionEntry) describe() (ActionType, stateNum, productionNum) {
- if e == actionEntryEmpty {
- return ActionTypeError, stateNumInitial, productionNumNil
- }
- if e < 0 {
- return ActionTypeShift, stateNum(e * -1), productionNumNil
- }
- return ActionTypeReduce, stateNumInitial, productionNum(e)
-}
-
-type GoToType string
-
-const (
- GoToTypeRegistered = GoToType("registered")
- GoToTypeError = GoToType("error")
-)
-
-type goToEntry uint
-
-const goToEntryEmpty = goToEntry(0)
-
-func newGoToEntry(state stateNum) goToEntry {
- return goToEntry(state)
-}
-
-func (e goToEntry) describe() (GoToType, stateNum) {
- if e == goToEntryEmpty {
- return GoToTypeError, stateNumInitial
- }
- return GoToTypeRegistered, stateNum(e)
-}
-
-type conflictResolutionMethod int
-
-func (m conflictResolutionMethod) Int() int {
- return int(m)
-}
-
-const (
- ResolvedByPrec conflictResolutionMethod = 1
- ResolvedByAssoc conflictResolutionMethod = 2
- ResolvedByShift conflictResolutionMethod = 3
- ResolvedByProdOrder conflictResolutionMethod = 4
-)
-
-type conflict interface {
- conflict()
-}
-
-type shiftReduceConflict struct {
- state stateNum
- sym symbol.Symbol
- nextState stateNum
- prodNum productionNum
- resolvedBy conflictResolutionMethod
-}
-
-func (c *shiftReduceConflict) conflict() {
-}
-
-type reduceReduceConflict struct {
- state stateNum
- sym symbol.Symbol
- prodNum1 productionNum
- prodNum2 productionNum
- resolvedBy conflictResolutionMethod
-}
-
-func (c *reduceReduceConflict) conflict() {
-}
-
-var (
- _ conflict = &shiftReduceConflict{}
- _ conflict = &reduceReduceConflict{}
-)
-
-type ParsingTable struct {
- actionTable []actionEntry
- goToTable []goToEntry
- stateCount int
- terminalCount int
- nonTerminalCount int
-
- // errorTrapperStates's index means a state number, and when `errorTrapperStates[stateNum]` is `1`,
- // the state has an item having the following form. The `α` and `β` can be empty.
- //
- // A → α・error β
- errorTrapperStates []int
-
- InitialState stateNum
-}
-
-func (t *ParsingTable) getAction(state stateNum, sym symbol.SymbolNum) (ActionType, stateNum, productionNum) {
- pos := state.Int()*t.terminalCount + sym.Int()
- return t.actionTable[pos].describe()
-}
-
-func (t *ParsingTable) getGoTo(state stateNum, sym symbol.SymbolNum) (GoToType, stateNum) {
- pos := state.Int()*t.nonTerminalCount + sym.Int()
- return t.goToTable[pos].describe()
-}
-
-func (t *ParsingTable) readAction(row int, col int) actionEntry {
- return t.actionTable[row*t.terminalCount+col]
-}
-
-func (t *ParsingTable) writeAction(row int, col int, act actionEntry) {
- t.actionTable[row*t.terminalCount+col] = act
-}
-
-func (t *ParsingTable) writeGoTo(state stateNum, sym symbol.Symbol, nextState stateNum) {
- pos := state.Int()*t.nonTerminalCount + sym.Num().Int()
- t.goToTable[pos] = newGoToEntry(nextState)
-}
-
-type lrTableBuilder struct {
- automaton *lr0Automaton
- prods *productionSet
- termCount int
- nonTermCount int
- symTab *symbol.SymbolTableReader
- precAndAssoc *precAndAssoc
-
- conflicts []conflict
-}
-
-func (b *lrTableBuilder) build() (*ParsingTable, error) {
- var ptab *ParsingTable
- {
- initialState := b.automaton.states[b.automaton.initialState]
- ptab = &ParsingTable{
- actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount),
- goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount),
- stateCount: len(b.automaton.states),
- terminalCount: b.termCount,
- nonTerminalCount: b.nonTermCount,
- errorTrapperStates: make([]int, len(b.automaton.states)),
- InitialState: initialState.num,
- }
- }
-
- for _, state := range b.automaton.states {
- if state.isErrorTrapper {
- ptab.errorTrapperStates[state.num] = 1
- }
-
- for sym, kID := range state.next {
- nextState := b.automaton.states[kID]
- if sym.IsTerminal() {
- b.writeShiftAction(ptab, state.num, sym, nextState.num)
- } else {
- ptab.writeGoTo(state.num, sym, nextState.num)
- }
- }
-
- for prodID := range state.reducible {
- reducibleProd, ok := b.prods.findByID(prodID)
- if !ok {
- return nil, fmt.Errorf("reducible production not found: %v", prodID)
- }
-
- var reducibleItem *lrItem
- for _, item := range state.items {
- if item.prod != reducibleProd.id {
- continue
- }
-
- reducibleItem = item
- break
- }
- if reducibleItem == nil {
- for _, item := range state.emptyProdItems {
- if item.prod != reducibleProd.id {
- continue
- }
-
- reducibleItem = item
- break
- }
- if reducibleItem == nil {
- return nil, fmt.Errorf("reducible item not found; state: %v, production: %v", state.num, reducibleProd.num)
- }
- }
-
- for a := range reducibleItem.lookAhead.symbols {
- b.writeReduceAction(ptab, state.num, a, reducibleProd.num)
- }
- }
- }
-
- return ptab, nil
-}
-
-// writeShiftAction writes a shift action to the parsing table. When a shift/reduce conflict occurred,
-// we prioritize the shift action.
-func (b *lrTableBuilder) writeShiftAction(tab *ParsingTable, state stateNum, sym symbol.Symbol, nextState stateNum) {
- act := tab.readAction(state.Int(), sym.Num().Int())
- if !act.isEmpty() {
- ty, _, p := act.describe()
- if ty == ActionTypeReduce {
- act, method := b.resolveSRConflict(sym.Num(), p)
- b.conflicts = append(b.conflicts, &shiftReduceConflict{
- state: state,
- sym: sym,
- nextState: nextState,
- prodNum: p,
- resolvedBy: method,
- })
- if act == ActionTypeShift {
- tab.writeAction(state.Int(), sym.Num().Int(), newShiftActionEntry(nextState))
- }
- return
- }
- }
- tab.writeAction(state.Int(), sym.Num().Int(), newShiftActionEntry(nextState))
-}
-
-// writeReduceAction writes a reduce action to the parsing table. When a shift/reduce conflict occurred,
-// we prioritize the shift action, and when a reduce/reduce conflict we prioritize the action that reduces
-// the production with higher priority. Productions defined earlier in the grammar file have a higher priority.
-func (b *lrTableBuilder) writeReduceAction(tab *ParsingTable, state stateNum, sym symbol.Symbol, prod productionNum) {
- act := tab.readAction(state.Int(), sym.Num().Int())
- if !act.isEmpty() {
- ty, s, p := act.describe()
- switch ty {
- case ActionTypeReduce:
- if p == prod {
- return
- }
-
- b.conflicts = append(b.conflicts, &reduceReduceConflict{
- state: state,
- sym: sym,
- prodNum1: p,
- prodNum2: prod,
- resolvedBy: ResolvedByProdOrder,
- })
- if p < prod {
- tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(p))
- } else {
- tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod))
- }
- case ActionTypeShift:
- act, method := b.resolveSRConflict(sym.Num(), prod)
- b.conflicts = append(b.conflicts, &shiftReduceConflict{
- state: state,
- sym: sym,
- nextState: s,
- prodNum: prod,
- resolvedBy: method,
- })
- if act == ActionTypeReduce {
- tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod))
- }
- }
- return
- }
- tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod))
-}
-
-func (b *lrTableBuilder) resolveSRConflict(sym symbol.SymbolNum, prod productionNum) (ActionType, conflictResolutionMethod) {
- symPrec := b.precAndAssoc.terminalPrecedence(sym)
- prodPrec := b.precAndAssoc.productionPredence(prod)
- if symPrec == 0 || prodPrec == 0 {
- return ActionTypeShift, ResolvedByShift
- }
- if symPrec == prodPrec {
- assoc := b.precAndAssoc.productionAssociativity(prod)
- if assoc != assocTypeLeft {
- return ActionTypeShift, ResolvedByAssoc
- }
- return ActionTypeReduce, ResolvedByAssoc
- }
- if symPrec < prodPrec {
- return ActionTypeShift, ResolvedByPrec
- }
- return ActionTypeReduce, ResolvedByPrec
-}
-
-func (b *lrTableBuilder) genReport(tab *ParsingTable, gram *Grammar) (*spec.Report, error) {
- var terms []*spec.Terminal
- {
- termSyms := b.symTab.TerminalSymbols()
- terms = make([]*spec.Terminal, len(termSyms)+1)
-
- for _, sym := range termSyms {
- name, ok := b.symTab.ToText(sym)
- if !ok {
- return nil, fmt.Errorf("failed to generate terminals: symbol not found: %v", sym)
- }
-
- term := &spec.Terminal{
- Number: sym.Num().Int(),
- Name: name,
- }
-
- prec := b.precAndAssoc.terminalPrecedence(sym.Num())
- if prec != precNil {
- term.Precedence = prec
- }
-
- assoc := b.precAndAssoc.terminalAssociativity(sym.Num())
- switch assoc {
- case assocTypeLeft:
- term.Associativity = "l"
- case assocTypeRight:
- term.Associativity = "r"
- }
-
- terms[sym.Num()] = term
- }
- }
-
- var nonTerms []*spec.NonTerminal
- {
- nonTermSyms := b.symTab.NonTerminalSymbols()
- nonTerms = make([]*spec.NonTerminal, len(nonTermSyms)+1)
- for _, sym := range nonTermSyms {
- name, ok := b.symTab.ToText(sym)
- if !ok {
- return nil, fmt.Errorf("failed to generate non-terminals: symbol not found: %v", sym)
- }
-
- nonTerms[sym.Num()] = &spec.NonTerminal{
- Number: sym.Num().Int(),
- Name: name,
- }
- }
- }
-
- var prods []*spec.Production
- {
- ps := gram.productionSet.getAllProductions()
- prods = make([]*spec.Production, len(ps)+1)
- for _, p := range ps {
- rhs := make([]int, len(p.rhs))
- for i, e := range p.rhs {
- if e.IsTerminal() {
- rhs[i] = e.Num().Int()
- } else {
- rhs[i] = e.Num().Int() * -1
- }
- }
-
- prod := &spec.Production{
- Number: p.num.Int(),
- LHS: p.lhs.Num().Int(),
- RHS: rhs,
- }
-
- prec := b.precAndAssoc.productionPredence(p.num)
- if prec != precNil {
- prod.Precedence = prec
- }
-
- assoc := b.precAndAssoc.productionAssociativity(p.num)
- switch assoc {
- case assocTypeLeft:
- prod.Associativity = "l"
- case assocTypeRight:
- prod.Associativity = "r"
- }
-
- prods[p.num.Int()] = prod
- }
- }
-
- var states []*spec.State
- {
- srConflicts := map[stateNum][]*shiftReduceConflict{}
- rrConflicts := map[stateNum][]*reduceReduceConflict{}
- for _, con := range b.conflicts {
- switch c := con.(type) {
- case *shiftReduceConflict:
- srConflicts[c.state] = append(srConflicts[c.state], c)
- case *reduceReduceConflict:
- rrConflicts[c.state] = append(rrConflicts[c.state], c)
- }
- }
-
- states = make([]*spec.State, len(b.automaton.states))
- for _, s := range b.automaton.states {
- kernel := make([]*spec.Item, len(s.items))
- for i, item := range s.items {
- p, ok := b.prods.findByID(item.prod)
- if !ok {
- return nil, fmt.Errorf("failed to generate states: production of kernel item not found: %v", item.prod)
- }
-
- kernel[i] = &spec.Item{
- Production: p.num.Int(),
- Dot: item.dot,
- }
- }
-
- sort.Slice(kernel, func(i, j int) bool {
- if kernel[i].Production < kernel[j].Production {
- return true
- }
- if kernel[i].Production > kernel[j].Production {
- return false
- }
- return kernel[i].Dot < kernel[j].Dot
- })
-
- var shift []*spec.Transition
- var reduce []*spec.Reduce
- var goTo []*spec.Transition
- {
- TERMINALS_LOOP:
- for _, t := range b.symTab.TerminalSymbols() {
- act, next, prod := tab.getAction(s.num, t.Num())
- switch act {
- case ActionTypeShift:
- shift = append(shift, &spec.Transition{
- Symbol: t.Num().Int(),
- State: next.Int(),
- })
- case ActionTypeReduce:
- for _, r := range reduce {
- if r.Production == prod.Int() {
- r.LookAhead = append(r.LookAhead, t.Num().Int())
- continue TERMINALS_LOOP
- }
- }
- reduce = append(reduce, &spec.Reduce{
- LookAhead: []int{t.Num().Int()},
- Production: prod.Int(),
- })
- }
- }
-
- for _, n := range b.symTab.NonTerminalSymbols() {
- ty, next := tab.getGoTo(s.num, n.Num())
- if ty == GoToTypeRegistered {
- goTo = append(goTo, &spec.Transition{
- Symbol: n.Num().Int(),
- State: next.Int(),
- })
- }
- }
-
- sort.Slice(shift, func(i, j int) bool {
- return shift[i].State < shift[j].State
- })
- sort.Slice(reduce, func(i, j int) bool {
- return reduce[i].Production < reduce[j].Production
- })
- sort.Slice(goTo, func(i, j int) bool {
- return goTo[i].State < goTo[j].State
- })
- }
-
- sr := []*spec.SRConflict{}
- rr := []*spec.RRConflict{}
- {
- for _, c := range srConflicts[s.num] {
- conflict := &spec.SRConflict{
- Symbol: c.sym.Num().Int(),
- State: c.nextState.Int(),
- Production: c.prodNum.Int(),
- ResolvedBy: c.resolvedBy.Int(),
- }
-
- ty, s, p := tab.getAction(s.num, c.sym.Num())
- switch ty {
- case ActionTypeShift:
- n := s.Int()
- conflict.AdoptedState = &n
- case ActionTypeReduce:
- n := p.Int()
- conflict.AdoptedProduction = &n
- }
-
- sr = append(sr, conflict)
- }
-
- sort.Slice(sr, func(i, j int) bool {
- return sr[i].Symbol < sr[j].Symbol
- })
-
- for _, c := range rrConflicts[s.num] {
- conflict := &spec.RRConflict{
- Symbol: c.sym.Num().Int(),
- Production1: c.prodNum1.Int(),
- Production2: c.prodNum2.Int(),
- ResolvedBy: c.resolvedBy.Int(),
- }
-
- _, _, p := tab.getAction(s.num, c.sym.Num())
- conflict.AdoptedProduction = p.Int()
-
- rr = append(rr, conflict)
- }
-
- sort.Slice(rr, func(i, j int) bool {
- return rr[i].Symbol < rr[j].Symbol
- })
- }
-
- states[s.num.Int()] = &spec.State{
- Number: s.num.Int(),
- Kernel: kernel,
- Shift: shift,
- Reduce: reduce,
- GoTo: goTo,
- SRConflict: sr,
- RRConflict: rr,
- }
- }
- }
-
- return &spec.Report{
- Terminals: terms,
- NonTerminals: nonTerms,
- Productions: prods,
- States: states,
- }, nil
-}
diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go
deleted file mode 100644
index 098adf9..0000000
--- a/grammar/parsing_table_test.go
+++ /dev/null
@@ -1,387 +0,0 @@
-package grammar
-
-import (
- "fmt"
- "strings"
- "testing"
-
- "grammar/symbol"
- "spec/grammar/parser"
-)
-
-type expectedState struct {
- kernelItems []*lrItem
- acts map[symbol.Symbol]testActionEntry
- goTos map[symbol.Symbol][]*lrItem
-}
-
-func TestGenLALRParsingTable(t *testing.T) {
- src := `
-#name test;
-
-s: l eq r | r;
-l: ref r | id;
-r: l;
-eq: '=';
-ref: '*';
-id: "[A-Za-z0-9_]+";
-`
-
- var ptab *ParsingTable
- var automaton *lalr1Automaton
- var gram *Grammar
- var nonTermCount int
- var termCount int
- {
- ast, err := parser.Parse(strings.NewReader(src))
- if err != nil {
- t.Fatal(err)
- }
- b := GrammarBuilder{
- AST: ast,
- }
- gram, err = b.build()
- if err != nil {
- t.Fatal(err)
- }
- first, err := genFirstSet(gram.productionSet)
- if err != nil {
- t.Fatal(err)
- }
- lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol)
- if err != nil {
- t.Fatal(err)
- }
- automaton, err = genLALR1Automaton(lr0, gram.productionSet, first)
- if err != nil {
- t.Fatal(err)
- }
-
- nonTermTexts, err := gram.symbolTable.NonTerminalTexts()
- if err != nil {
- t.Fatal(err)
- }
- termTexts, err := gram.symbolTable.TerminalTexts()
- if err != nil {
- t.Fatal(err)
- }
- nonTermCount = len(nonTermTexts)
- termCount = len(termTexts)
-
- lalr := &lrTableBuilder{
- automaton: automaton.lr0Automaton,
- prods: gram.productionSet,
- termCount: termCount,
- nonTermCount: nonTermCount,
- symTab: gram.symbolTable,
- }
- ptab, err = lalr.build()
- if err != nil {
- t.Fatalf("failed to create a LALR parsing table: %v", err)
- }
- if ptab == nil {
- t.Fatal("genLALRParsingTable returns nil without any error")
- }
- }
-
- genSym := newTestSymbolGenerator(t, gram.symbolTable)
- genProd := newTestProductionGenerator(t, genSym)
- genLR0Item := newTestLR0ItemGenerator(t, genProd)
-
- expectedKernels := map[int][]*lrItem{
- 0: {
- withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF),
- },
- 1: {
- withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF),
- },
- 2: {
- withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF),
- withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF),
- },
- 3: {
- withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF),
- },
- 4: {
- withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
- },
- 5: {
- withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF),
- },
- 6: {
- withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF),
- },
- 7: {
- withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF),
- },
- 8: {
- withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF),
- },
- 9: {
- withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF),
- },
- }
-
- expectedStates := []expectedState{
- {
- kernelItems: expectedKernels[0],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("ref"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[4],
- },
- genSym("id"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[5],
- },
- },
- goTos: map[symbol.Symbol][]*lrItem{
- genSym("s"): expectedKernels[1],
- genSym("l"): expectedKernels[2],
- genSym("r"): expectedKernels[3],
- },
- },
- {
- kernelItems: expectedKernels[1],
- acts: map[symbol.Symbol]testActionEntry{
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("s'", "s"),
- },
- },
- },
- {
- kernelItems: expectedKernels[2],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("eq"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[6],
- },
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("r", "l"),
- },
- },
- },
- {
- kernelItems: expectedKernels[3],
- acts: map[symbol.Symbol]testActionEntry{
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("s", "r"),
- },
- },
- },
- {
- kernelItems: expectedKernels[4],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("ref"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[4],
- },
- genSym("id"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[5],
- },
- },
- goTos: map[symbol.Symbol][]*lrItem{
- genSym("r"): expectedKernels[7],
- genSym("l"): expectedKernels[8],
- },
- },
- {
- kernelItems: expectedKernels[5],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("eq"): {
- ty: ActionTypeReduce,
- production: genProd("l", "id"),
- },
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("l", "id"),
- },
- },
- },
- {
- kernelItems: expectedKernels[6],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("ref"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[4],
- },
- genSym("id"): {
- ty: ActionTypeShift,
- nextState: expectedKernels[5],
- },
- },
- goTos: map[symbol.Symbol][]*lrItem{
- genSym("l"): expectedKernels[8],
- genSym("r"): expectedKernels[9],
- },
- },
- {
- kernelItems: expectedKernels[7],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("eq"): {
- ty: ActionTypeReduce,
- production: genProd("l", "ref", "r"),
- },
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("l", "ref", "r"),
- },
- },
- },
- {
- kernelItems: expectedKernels[8],
- acts: map[symbol.Symbol]testActionEntry{
- genSym("eq"): {
- ty: ActionTypeReduce,
- production: genProd("r", "l"),
- },
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("r", "l"),
- },
- },
- },
- {
- kernelItems: expectedKernels[9],
- acts: map[symbol.Symbol]testActionEntry{
- symbol.SymbolEOF: {
- ty: ActionTypeReduce,
- production: genProd("s", "l", "eq", "r"),
- },
- },
- },
- }
-
- t.Run("initial state", func(t *testing.T) {
- iniState := findStateByNum(automaton.states, ptab.InitialState)
- if iniState == nil {
- t.Fatalf("the initial state was not found: #%v", ptab.InitialState)
- }
- eIniState, err := newKernel(expectedKernels[0])
- if err != nil {
- t.Fatalf("failed to create a kernel item: %v", err)
- }
- if iniState.id != eIniState.id {
- t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id)
- }
- })
-
- for i, eState := range expectedStates {
- t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
- k, err := newKernel(eState.kernelItems)
- if err != nil {
- t.Fatalf("failed to create a kernel item: %v", err)
- }
- state, ok := automaton.states[k.id]
- if !ok {
- t.Fatalf("state was not found: #%v", 0)
- }
-
- testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount)
- testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount)
- })
- }
-}
-
-func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) {
- nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
- for eSym, eAct := range expectedState.acts {
- nonEmptyEntries[eSym.Num()] = struct{}{}
-
- ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num())
- if ty != eAct.ty {
- t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty)
- }
- switch eAct.ty {
- case ActionTypeShift:
- eNextState, err := newKernel(eAct.nextState)
- if err != nil {
- t.Fatal(err)
- }
- nextState := findStateByNum(automaton.states, stateNum)
- if nextState == nil {
- t.Fatalf("state was not found; state: #%v", stateNum)
- }
- if nextState.id != eNextState.id {
- t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
- }
- case ActionTypeReduce:
- prod := findProductionByNum(gram.productionSet, prodNum)
- if prod == nil {
- t.Fatalf("production was not found: #%v", prodNum)
- }
- if prod.id != eAct.production.id {
- t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id)
- }
- }
- }
- for symNum := 0; symNum < termCount; symNum++ {
- if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
- continue
- }
- ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum))
- if ty != ActionTypeError {
- t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum)
- }
- }
-}
-
-func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) {
- nonEmptyEntries := map[symbol.SymbolNum]struct{}{}
- for eSym, eGoTo := range expectedState.goTos {
- nonEmptyEntries[eSym.Num()] = struct{}{}
-
- eNextState, err := newKernel(eGoTo)
- if err != nil {
- t.Fatal(err)
- }
- ty, stateNum := ptab.getGoTo(state.num, eSym.Num())
- if ty != GoToTypeRegistered {
- t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym)
- }
- nextState := findStateByNum(automaton.states, stateNum)
- if nextState == nil {
- t.Fatalf("state was not found: #%v", stateNum)
- }
- if nextState.id != eNextState.id {
- t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id)
- }
- }
- for symNum := 0; symNum < nonTermCount; symNum++ {
- if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked {
- continue
- }
- ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum))
- if ty != GoToTypeError {
- t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum)
- }
- }
-}
-
-type testActionEntry struct {
- ty ActionType
- nextState []*lrItem
- production *production
-}
-
-func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState {
- for _, state := range states {
- if state.num == num {
- return state
- }
- }
- return nil
-}
-
-func findProductionByNum(prods *productionSet, num productionNum) *production {
- for _, prod := range prods.getAllProductions() {
- if prod.num == num {
- return prod
- }
- }
- return nil
-}
diff --git a/grammar/production.go b/grammar/production.go
deleted file mode 100644
index 1742dc6..0000000
--- a/grammar/production.go
+++ /dev/null
@@ -1,117 +0,0 @@
-package grammar
-
-import (
- "crypto/sha256"
- "encoding/hex"
- "fmt"
-
- "grammar/symbol"
-)
-
-type productionID [32]byte
-
-func (id productionID) String() string {
- return hex.EncodeToString(id[:])
-}
-
-func genProductionID(lhs symbol.Symbol, rhs []symbol.Symbol) productionID {
- seq := lhs.Byte()
- for _, sym := range rhs {
- seq = append(seq, sym.Byte()...)
- }
- return productionID(sha256.Sum256(seq))
-}
-
-type productionNum uint16
-
-const (
- productionNumNil = productionNum(0)
- productionNumStart = productionNum(1)
- productionNumMin = productionNum(2)
-)
-
-func (n productionNum) Int() int {
- return int(n)
-}
-
-type production struct {
- id productionID
- num productionNum
- lhs symbol.Symbol
- rhs []symbol.Symbol
- rhsLen int
-}
-
-func newProduction(lhs symbol.Symbol, rhs []symbol.Symbol) (*production, error) {
- if lhs.IsNil() {
- return nil, fmt.Errorf("LHS must be a non-nil symbol; LHS: %v, RHS: %v", lhs, rhs)
- }
- for _, sym := range rhs {
- if sym.IsNil() {
- return nil, fmt.Errorf("a symbol of RHS must be a non-nil symbol; LHS: %v, RHS: %v", lhs, rhs)
- }
- }
-
- return &production{
- id: genProductionID(lhs, rhs),
- lhs: lhs,
- rhs: rhs,
- rhsLen: len(rhs),
- }, nil
-}
-
-func (p *production) isEmpty() bool {
- return p.rhsLen == 0
-}
-
-type productionSet struct {
- lhs2Prods map[symbol.Symbol][]*production
- id2Prod map[productionID]*production
- num productionNum
-}
-
-func newProductionSet() *productionSet {
- return &productionSet{
- lhs2Prods: map[symbol.Symbol][]*production{},
- id2Prod: map[productionID]*production{},
- num: productionNumMin,
- }
-}
-
-func (ps *productionSet) append(prod *production) {
- if _, ok := ps.id2Prod[prod.id]; ok {
- return
- }
-
- if prod.lhs.IsStart() {
- prod.num = productionNumStart
- } else {
- prod.num = ps.num
- ps.num++
- }
-
- if prods, ok := ps.lhs2Prods[prod.lhs]; ok {
- ps.lhs2Prods[prod.lhs] = append(prods, prod)
- } else {
- ps.lhs2Prods[prod.lhs] = []*production{prod}
- }
- ps.id2Prod[prod.id] = prod
-}
-
-func (ps *productionSet) findByID(id productionID) (*production, bool) {
- prod, ok := ps.id2Prod[id]
- return prod, ok
-}
-
-func (ps *productionSet) findByLHS(lhs symbol.Symbol) ([]*production, bool) {
- if lhs.IsNil() {
- return nil, false
- }
-
- prods, ok := ps.lhs2Prods[lhs]
- return prods, ok
-}
-
-func (ps *productionSet) getAllProductions() map[productionID]*production {
- return ps.id2Prod
-}
diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go
deleted file mode 100644
index 88a6b17..0000000
--- a/grammar/semantic_error.go
+++ /dev/null
@@ -1,30 +0,0 @@
-package grammar
-
-import "errors"
-
-var (
- semErrNoGrammarName = errors.New("name is missing")
- semErrSpellingInconsistency = errors.New("the identifiers are treated as the same. please use the same spelling")
- semErrDuplicateAssoc = errors.New("associativity and precedence cannot be specified multiple times for a symbol")
- semErrUndefinedPrec = errors.New("symbol must has precedence")
- semErrUndefinedOrdSym = errors.New("undefined ordered symbol")
- semErrUnusedProduction = errors.New("unused production")
- semErrUnusedTerminal = errors.New("unused terminal")
- semErrTermCannotBeSkipped = errors.New("a terminal used in productions cannot be skipped")
- semErrNoProduction = errors.New("a grammar needs at least one production")
- semErrUndefinedSym = errors.New("undefined symbol")
- semErrDuplicateProduction = errors.New("duplicate production")
- semErrDuplicateTerminal = errors.New("duplicate terminal")
- semErrDuplicateFragment = errors.New("duplicate fragment")
- semErrDuplicateName = errors.New("duplicate names are not allowed between terminals and non-terminals")
- semErrErrSymIsReserved = errors.New("symbol 'error' is reserved as a terminal symbol")
- semErrDuplicateLabel = errors.New("a label must be unique in an alternative")
- semErrInvalidLabel = errors.New("a label must differ from terminal symbols or non-terminal symbols")
- semErrDirInvalidName = errors.New("invalid directive name")
- semErrDirInvalidParam = errors.New("invalid parameter")
- semErrDuplicateDir = errors.New("a directive must not be duplicated")
- semErrDuplicateElem = errors.New("duplicate element")
- semErrAmbiguousElem = errors.New("ambiguous element")
- semErrInvalidProdDir = errors.New("invalid production directive")
- semErrInvalidAltDir = errors.New("invalid alternative directive")
-)
diff --git a/grammar/symbol/symbol.go b/grammar/symbol/symbol.go
deleted file mode 100644
index f9e6a93..0000000
--- a/grammar/symbol/symbol.go
+++ /dev/null
@@ -1,295 +0,0 @@
-package symbol
-
-import (
- "fmt"
- "sort"
-)
-
-type symbolKind string
-
-const (
- symbolKindNonTerminal = symbolKind("non-terminal")
- symbolKindTerminal = symbolKind("terminal")
-)
-
-func (t symbolKind) String() string {
- return string(t)
-}
-
-type SymbolNum uint16
-
-func (n SymbolNum) Int() int {
- return int(n)
-}
-
-type Symbol uint16
-
-func (s Symbol) String() string {
- kind, isStart, isEOF, num := s.describe()
- var prefix string
- switch {
- case isStart:
- prefix = "s"
- case isEOF:
- prefix = "e"
- case kind == symbolKindNonTerminal:
- prefix = "n"
- case kind == symbolKindTerminal:
- prefix = "t"
- default:
- prefix = "?"
- }
- return fmt.Sprintf("%v%v", prefix, num)
-}
-
-const (
- maskKindPart = uint16(0x8000) // 1000 0000 0000 0000
- maskNonTerminal = uint16(0x0000) // 0000 0000 0000 0000
- maskTerminal = uint16(0x8000) // 1000 0000 0000 0000
-
- maskSubKindpart = uint16(0x4000) // 0100 0000 0000 0000
- maskNonStartAndEOF = uint16(0x0000) // 0000 0000 0000 0000
- maskStartOrEOF = uint16(0x4000) // 0100 0000 0000 0000
-
- maskNumberPart = uint16(0x3fff) // 0011 1111 1111 1111
-
- symbolNumStart = uint16(0x0001) // 0000 0000 0000 0001
- symbolNumEOF = uint16(0x0001) // 0000 0000 0000 0001
-
- SymbolNil = Symbol(0) // 0000 0000 0000 0000
- symbolStart = Symbol(maskNonTerminal | maskStartOrEOF | symbolNumStart) // 0100 0000 0000 0001
- SymbolEOF = Symbol(maskTerminal | maskStartOrEOF | symbolNumEOF) // 1100 0000 0000 0001: The EOF symbol is treated as a terminal symbol.
-
- // The symbol name contains `<` and `>` to avoid conflicting with user-defined symbols.
- symbolNameEOF = "<eof>"
-
- nonTerminalNumMin = SymbolNum(2) // The number 1 is used by a start symbol.
- terminalNumMin = SymbolNum(2) // The number 1 is used by the EOF symbol.
- symbolNumMax = SymbolNum(0xffff) >> 2 // 0011 1111 1111 1111
-)
-
-func newSymbol(kind symbolKind, isStart bool, num SymbolNum) (Symbol, error) {
- if num > symbolNumMax {
- return SymbolNil, fmt.Errorf("a symbol number exceeds the limit; limit: %v, passed: %v", symbolNumMax, num)
- }
- if kind == symbolKindTerminal && isStart {
- return SymbolNil, fmt.Errorf("a start symbol must be a non-terminal symbol")
- }
-
- kindMask := maskNonTerminal
- if kind == symbolKindTerminal {
- kindMask = maskTerminal
- }
- startMask := maskNonStartAndEOF
- if isStart {
- startMask = maskStartOrEOF
- }
- return Symbol(kindMask | startMask | uint16(num)), nil
-}
-
-func (s Symbol) Num() SymbolNum {
- _, _, _, num := s.describe()
- return num
-}
-
-func (s Symbol) Byte() []byte {
- if s.IsNil() {
- return []byte{0, 0}
- }
- return []byte{byte(uint16(s) >> 8), byte(uint16(s) & 0x00ff)}
-}
-
-func (s Symbol) IsNil() bool {
- _, _, _, num := s.describe()
- return num == 0
-}
-
-func (s Symbol) IsStart() bool {
- if s.IsNil() {
- return false
- }
- _, isStart, _, _ := s.describe()
- return isStart
-}
-
-func (s Symbol) isEOF() bool {
- if s.IsNil() {
- return false
- }
- _, _, isEOF, _ := s.describe()
- return isEOF
-}
-
-func (s Symbol) isNonTerminal() bool {
- if s.IsNil() {
- return false
- }
- kind, _, _, _ := s.describe()
- return kind == symbolKindNonTerminal
-}
-
-func (s Symbol) IsTerminal() bool {
- if s.IsNil() {
- return false
- }
- return !s.isNonTerminal()
-}
-
-func (s Symbol) describe() (symbolKind, bool, bool, SymbolNum) {
- kind := symbolKindNonTerminal
- if uint16(s)&maskKindPart > 0 {
- kind = symbolKindTerminal
- }
- isStart := false
- isEOF := false
- if uint16(s)&maskSubKindpart > 0 {
- if kind == symbolKindNonTerminal {
- isStart = true
- } else {
- isEOF = true
- }
- }
- num := SymbolNum(uint16(s) & maskNumberPart)
- return kind, isStart, isEOF, num
-}
-
-type SymbolTable struct {
- text2Sym map[string]Symbol
- sym2Text map[Symbol]string
- nonTermTexts []string
- termTexts []string
- nonTermNum SymbolNum
- termNum SymbolNum
-}
-
-type SymbolTableWriter struct {
- *SymbolTable
-}
-
-type SymbolTableReader struct {
- *SymbolTable
-}
-
-func NewSymbolTable() *SymbolTable {
- return &SymbolTable{
- text2Sym: map[string]Symbol{
- symbolNameEOF: SymbolEOF,
- },
- sym2Text: map[Symbol]string{
- SymbolEOF: symbolNameEOF,
- },
- termTexts: []string{
- "", // Nil
- symbolNameEOF, // EOF
- },
- nonTermTexts: []string{
- "", // Nil
- "", // Start Symbol
- },
- nonTermNum: nonTerminalNumMin,
- termNum: terminalNumMin,
- }
-}
-
-func (t *SymbolTable) Writer() *SymbolTableWriter {
- return &SymbolTableWriter{
- SymbolTable: t,
- }
-}
-
-func (t *SymbolTable) Reader() *SymbolTableReader {
- return &SymbolTableReader{
- SymbolTable: t,
- }
-}
-
-func (w *SymbolTableWriter) RegisterStartSymbol(text string) (Symbol, error) {
- w.text2Sym[text] = symbolStart
- w.sym2Text[symbolStart] = text
- w.nonTermTexts[symbolStart.Num().Int()] = text
- return symbolStart, nil
-}
-
-func (w *SymbolTableWriter) RegisterNonTerminalSymbol(text string) (Symbol, error) {
- if sym, ok := w.text2Sym[text]; ok {
- return sym, nil
- }
- sym, err := newSymbol(symbolKindNonTerminal, false, w.nonTermNum)
- if err != nil {
- return SymbolNil, err
- }
- w.nonTermNum++
- w.text2Sym[text] = sym
- w.sym2Text[sym] = text
- w.nonTermTexts = append(w.nonTermTexts, text)
- return sym, nil
-}
-
-func (w *SymbolTableWriter) RegisterTerminalSymbol(text string) (Symbol, error) {
- if sym, ok := w.text2Sym[text]; ok {
- return sym, nil
- }
- sym, err := newSymbol(symbolKindTerminal, false, w.termNum)
- if err != nil {
- return SymbolNil, err
- }
- w.termNum++
- w.text2Sym[text] = sym
- w.sym2Text[sym] = text
- w.termTexts = append(w.termTexts, text)
- return sym, nil
-}
-
-func (r *SymbolTableReader) ToSymbol(text string) (Symbol, bool) {
- if sym, ok := r.text2Sym[text]; ok {
- return sym, true
- }
- return SymbolNil, false
-}
-
-func (r *SymbolTableReader) ToText(sym Symbol) (string, bool) {
- text, ok := r.sym2Text[sym]
- return text, ok
-}
-
-func (r *SymbolTableReader) TerminalSymbols() []Symbol {
- syms := make([]Symbol, 0, r.termNum.Int()-terminalNumMin.Int())
- for sym := range r.sym2Text {
- if !sym.IsTerminal() || sym.IsNil() {
- continue
- }
- syms = append(syms, sym)
- }
- sort.Slice(syms, func(i, j int) bool {
- return syms[i] < syms[j]
- })
- return syms
-}
-
-func (r *SymbolTableReader) TerminalTexts() ([]string, error) {
- if r.termNum == terminalNumMin {
- return nil, fmt.Errorf("symbol table has no terminals")
- }
- return r.termTexts, nil
-}
-
-func (r *SymbolTableReader) NonTerminalSymbols() []Symbol {
- syms := make([]Symbol, 0, r.nonTermNum.Int()-nonTerminalNumMin.Int())
- for sym := range r.sym2Text {
- if !sym.isNonTerminal() || sym.IsNil() {
- continue
- }
- syms = append(syms, sym)
- }
- sort.Slice(syms, func(i, j int) bool {
- return syms[i] < syms[j]
- })
- return syms
-}
-
-func (r *SymbolTableReader) NonTerminalTexts() ([]string, error) {
- if r.nonTermNum == nonTerminalNumMin || r.nonTermTexts[symbolStart.Num().Int()] == "" {
- return nil, fmt.Errorf("symbol table has no terminals or no start symbol")
- }
- return r.nonTermTexts, nil
-}
diff --git a/grammar/symbol/symbol_test.go b/grammar/symbol/symbol_test.go
deleted file mode 100644
index 31c3edd..0000000
--- a/grammar/symbol/symbol_test.go
+++ /dev/null
@@ -1,159 +0,0 @@
-package symbol
-
-import "testing"
-
-func TestSymbol(t *testing.T) {
- tab := NewSymbolTable()
- w := tab.Writer()
- _, _ = w.RegisterStartSymbol("expr'")
- _, _ = w.RegisterNonTerminalSymbol("expr")
- _, _ = w.RegisterNonTerminalSymbol("term")
- _, _ = w.RegisterNonTerminalSymbol("factor")
- _, _ = w.RegisterTerminalSymbol("id")
- _, _ = w.RegisterTerminalSymbol("add")
- _, _ = w.RegisterTerminalSymbol("mul")
- _, _ = w.RegisterTerminalSymbol("l_paren")
- _, _ = w.RegisterTerminalSymbol("r_paren")
-
- nonTermTexts := []string{
- "", // Nil
- "expr'",
- "expr",
- "term",
- "factor",
- }
-
- termTexts := []string{
- "", // Nil
- symbolNameEOF, // EOF
- "id",
- "add",
- "mul",
- "l_paren",
- "r_paren",
- }
-
- tests := []struct {
- text string
- isNil bool
- isStart bool
- isEOF bool
- isNonTerminal bool
- isTerminal bool
- }{
- {
- text: "expr'",
- isStart: true,
- isNonTerminal: true,
- },
- {
- text: "expr",
- isNonTerminal: true,
- },
- {
- text: "term",
- isNonTerminal: true,
- },
- {
- text: "factor",
- isNonTerminal: true,
- },
- {
- text: "id",
- isTerminal: true,
- },
- {
- text: "add",
- isTerminal: true,
- },
- {
- text: "mul",
- isTerminal: true,
- },
- {
- text: "l_paren",
- isTerminal: true,
- },
- {
- text: "r_paren",
- isTerminal: true,
- },
- }
- for _, tt := range tests {
- t.Run(tt.text, func(t *testing.T) {
- r := tab.Reader()
- sym, ok := r.ToSymbol(tt.text)
- if !ok {
- t.Fatalf("symbol was not found")
- }
- testSymbolProperty(t, sym, tt.isNil, tt.isStart, tt.isEOF, tt.isNonTerminal, tt.isTerminal)
- text, ok := r.ToText(sym)
- if !ok {
- t.Fatalf("text was not found")
- }
- if text != tt.text {
- t.Fatalf("unexpected text representation; want: %v, got: %v", tt.text, text)
- }
- })
- }
-
- t.Run("EOF", func(t *testing.T) {
- testSymbolProperty(t, SymbolEOF, false, false, true, false, true)
- })
-
- t.Run("Nil", func(t *testing.T) {
- testSymbolProperty(t, SymbolNil, true, false, false, false, false)
- })
-
- t.Run("texts of non-terminals", func(t *testing.T) {
- r := tab.Reader()
- ts, err := r.NonTerminalTexts()
- if err != nil {
- t.Fatal(err)
- }
- if len(ts) != len(nonTermTexts) {
- t.Fatalf("unexpected non-terminal count; want: %v (%#v), got: %v (%#v)", len(nonTermTexts), nonTermTexts, len(ts), ts)
- }
- for i, text := range ts {
- if text != nonTermTexts[i] {
- t.Fatalf("unexpected non-terminal; want: %v, got: %v", nonTermTexts[i], text)
- }
- }
- })
-
- t.Run("texts of terminals", func(t *testing.T) {
- r := tab.Reader()
- ts, err := r.TerminalTexts()
- if err != nil {
- t.Fatal(err)
- }
- if len(ts) != len(termTexts) {
- t.Fatalf("unexpected terminal count; want: %v (%#v), got: %v (%#v)", len(termTexts), termTexts, len(ts), ts)
- }
- for i, text := range ts {
- if text != termTexts[i] {
- t.Fatalf("unexpected terminal; want: %v, got: %v", termTexts[i], text)
- }
- }
- })
-}
-
-func testSymbolProperty(t *testing.T, sym Symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) {
- t.Helper()
-
- if v := sym.IsNil(); v != isNil {
- t.Fatalf("isNil property is mismatched; want: %v, got: %v", isNil, v)
- }
- if v := sym.IsStart(); v != isStart {
- t.Fatalf("isStart property is mismatched; want: %v, got: %v", isStart, v)
- }
- if v := sym.isEOF(); v != isEOF {
- t.Fatalf("isEOF property is mismatched; want: %v, got: %v", isEOF, v)
- }
- if v := sym.isNonTerminal(); v != isNonTerminal {
- t.Fatalf("isNonTerminal property is mismatched; want: %v, got: %v", isNonTerminal, v)
- }
- if v := sym.IsTerminal(); v != isTerminal {
- t.Fatalf("isTerminal property is mismatched; want: %v, got: %v", isTerminal, v)
- }
-}
diff --git a/grammar/test_helper_test.go b/grammar/test_helper_test.go
deleted file mode 100644
index 63fcafb..0000000
--- a/grammar/test_helper_test.go
+++ /dev/null
@@ -1,68 +0,0 @@
-package grammar
-
-import (
- "testing"
-
- "grammar/symbol"
-)
-
-type testSymbolGenerator func(text string) symbol.Symbol
-
-func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator {
- return func(text string) symbol.Symbol {
- t.Helper()
-
- sym, ok := symTab.ToSymbol(text)
- if !ok {
- t.Fatalf("symbol was not found: %v", text)
- }
- return sym
- }
-}
-
-type testProductionGenerator func(lhs string, rhs ...string) *production
-
-func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator {
- return func(lhs string, rhs ...string) *production {
- t.Helper()
-
- rhsSym := []symbol.Symbol{}
- for _, text := range rhs {
- rhsSym = append(rhsSym, genSym(text))
- }
- prod, err := newProduction(genSym(lhs), rhsSym)
- if err != nil {
- t.Fatalf("failed to create a production: %v", err)
- }
-
- return prod
- }
-}
-
-type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem
-
-func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator {
- return func(lhs string, dot int, rhs ...string) *lrItem {
- t.Helper()
-
- prod := genProd(lhs, rhs...)
- item, err := newLR0Item(prod, dot)
- if err != nil {
- t.Fatalf("failed to create a LR0 item: %v", err)
- }
-
- return item
- }
-}
-
-func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem {
- if item.lookAhead.symbols == nil {
- item.lookAhead.symbols = map[symbol.Symbol]struct{}{}
- }
-
- for _, a := range lookAhead {
- item.lookAhead.symbols[a] = struct{}{}
- }
-
- return item
-}