diff options
Diffstat (limited to 'grammar')
32 files changed, 0 insertions, 14009 deletions
diff --git a/grammar/first.go b/grammar/first.go deleted file mode 100644 index 4f0bdcf..0000000 --- a/grammar/first.go +++ /dev/null @@ -1,148 +0,0 @@ -package grammar - -import ( - "fmt" - - "grammar/symbol" -) - -type firstEntry struct { - symbols map[symbol.Symbol]struct{} - empty bool -} - -func newFirstEntry() *firstEntry { - return &firstEntry{ - symbols: map[symbol.Symbol]struct{}{}, - empty: false, - } -} - -func (e *firstEntry) add(sym symbol.Symbol) bool { - if _, ok := e.symbols[sym]; ok { - return false - } - e.symbols[sym] = struct{}{} - return true -} - -func (e *firstEntry) addEmpty() bool { - if !e.empty { - e.empty = true - return true - } - return false -} - -func (e *firstEntry) mergeExceptEmpty(target *firstEntry) bool { - if target == nil { - return false - } - changed := false - for sym := range target.symbols { - added := e.add(sym) - if added { - changed = true - } - } - return changed -} - -type firstSet struct { - set map[symbol.Symbol]*firstEntry -} - -func newFirstSet(prods *productionSet) *firstSet { - fst := &firstSet{ - set: map[symbol.Symbol]*firstEntry{}, - } - for _, prod := range prods.getAllProductions() { - if _, ok := fst.set[prod.lhs]; ok { - continue - } - fst.set[prod.lhs] = newFirstEntry() - } - - return fst -} - -func (fst *firstSet) find(prod *production, head int) (*firstEntry, error) { - entry := newFirstEntry() - if prod.rhsLen <= head { - entry.addEmpty() - return entry, nil - } - for _, sym := range prod.rhs[head:] { - if sym.IsTerminal() { - entry.add(sym) - return entry, nil - } - - e := fst.findBySymbol(sym) - if e == nil { - return nil, fmt.Errorf("an entry of FIRST was not found; symbol: %s", sym) - } - for s := range e.symbols { - entry.add(s) - } - if !e.empty { - return entry, nil - } - } - entry.addEmpty() - return entry, nil -} - -func (fst *firstSet) findBySymbol(sym symbol.Symbol) *firstEntry { - return fst.set[sym] -} - -type firstComContext struct { - first *firstSet -} - -func newFirstComContext(prods *productionSet) *firstComContext { - return &firstComContext{ - first: newFirstSet(prods), - } -} - -func genFirstSet(prods *productionSet) (*firstSet, error) { - cc := newFirstComContext(prods) - for { - more := false - for _, prod := range prods.getAllProductions() { - e := cc.first.findBySymbol(prod.lhs) - changed, err := genProdFirstEntry(cc, e, prod) - if err != nil { - return nil, err - } - if changed { - more = true - } - } - if !more { - break - } - } - return cc.first, nil -} - -func genProdFirstEntry(cc *firstComContext, acc *firstEntry, prod *production) (bool, error) { - if prod.isEmpty() { - return acc.addEmpty(), nil - } - - for _, sym := range prod.rhs { - if sym.IsTerminal() { - return acc.add(sym), nil - } - - e := cc.first.findBySymbol(sym) - changed := acc.mergeExceptEmpty(e) - if !e.empty { - return changed, nil - } - } - return acc.addEmpty(), nil -} diff --git a/grammar/first_test.go b/grammar/first_test.go deleted file mode 100644 index ea1a61c..0000000 --- a/grammar/first_test.go +++ /dev/null @@ -1,219 +0,0 @@ -package grammar - -import ( - "strings" - "testing" - - "grammar/symbol" - "spec/grammar/parser" -) - -type first struct { - lhs string - num int - dot int - symbols []string - empty bool -} - -func TestGenFirst(t *testing.T) { - tests := []struct { - caption string - src string - first []first - }{ - { - caption: "productions contain only non-empty productions", - src: ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -`, - first: []first{ - {lhs: "expr'", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 0, dot: 1, symbols: []string{"add"}}, - {lhs: "expr", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, - {lhs: "expr", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 0, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 0, dot: 1, symbols: []string{"mul"}}, - {lhs: "term", num: 0, dot: 2, symbols: []string{"l_paren", "id"}}, - {lhs: "term", num: 1, dot: 0, symbols: []string{"l_paren", "id"}}, - {lhs: "factor", num: 0, dot: 0, symbols: []string{"l_paren"}}, - {lhs: "factor", num: 0, dot: 1, symbols: []string{"l_paren", "id"}}, - {lhs: "factor", num: 0, dot: 2, symbols: []string{"r_paren"}}, - {lhs: "factor", num: 1, dot: 0, symbols: []string{"id"}}, - }, - }, - { - caption: "productions contain the empty start production", - src: ` -#name test; - -s - : - ; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "productions contain an empty production", - src: ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar: "bar"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: false}, - {lhs: "foo", num: 0, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "a start production contains a non-empty alternative and empty alternative", - src: ` -#name test; - -s - : foo - | - ; -foo: "foo"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"foo"}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"foo"}}, - {lhs: "s", num: 1, dot: 0, symbols: []string{}, empty: true}, - }, - }, - { - caption: "a production contains non-empty alternative and empty alternative", - src: ` -#name test; - -s - : foo - ; -foo - : bar - | - ; -bar: "bar"; -`, - first: []first{ - {lhs: "s'", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, - {lhs: "s", num: 0, dot: 0, symbols: []string{"bar"}, empty: true}, - {lhs: "foo", num: 0, dot: 0, symbols: []string{"bar"}}, - {lhs: "foo", num: 1, dot: 0, symbols: []string{}, empty: true}, - }, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - fst, gram := genActualFirst(t, tt.src) - - for _, ttFirst := range tt.first { - lhsSym, ok := gram.symbolTable.ToSymbol(ttFirst.lhs) - if !ok { - t.Fatalf("a symbol was not found; symbol: %v", ttFirst.lhs) - } - - prod, ok := gram.productionSet.findByLHS(lhsSym) - if !ok { - t.Fatalf("a production was not found; LHS: %v (%v)", ttFirst.lhs, lhsSym) - } - - actualFirst, err := fst.find(prod[ttFirst.num], ttFirst.dot) - if err != nil { - t.Fatalf("failed to get a FIRST set; LHS: %v (%v), num: %v, dot: %v, error: %v", ttFirst.lhs, lhsSym, ttFirst.num, ttFirst.dot, err) - } - - expectedFirst := genExpectedFirstEntry(t, ttFirst.symbols, ttFirst.empty, gram.symbolTable) - - testFirst(t, actualFirst, expectedFirst) - } - }) - } -} - -func genActualFirst(t *testing.T, src string) (*firstSet, *Grammar) { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err := b.build() - if err != nil { - t.Fatal(err) - } - fst, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - if fst == nil { - t.Fatal("genFiest returned nil without any error") - } - - return fst, gram -} - -func genExpectedFirstEntry(t *testing.T, symbols []string, empty bool, symTab *symbol.SymbolTableReader) *firstEntry { - t.Helper() - - entry := newFirstEntry() - if empty { - entry.addEmpty() - } - for _, sym := range symbols { - symSym, ok := symTab.ToSymbol(sym) - if !ok { - t.Fatalf("a symbol was not found; symbol: %v", sym) - } - entry.add(symSym) - } - - return entry -} - -func testFirst(t *testing.T, actual, expected *firstEntry) { - if actual.empty != expected.empty { - t.Errorf("empty is mismatched\nwant: %v\ngot: %v", expected.empty, actual.empty) - } - - if len(actual.symbols) != len(expected.symbols) { - t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) - } - - for eSym := range expected.symbols { - if _, ok := actual.symbols[eSym]; !ok { - t.Fatalf("invalid FIRST set\nwant: %+v\ngot: %+v", expected.symbols, actual.symbols) - } - } -} diff --git a/grammar/grammar.go b/grammar/grammar.go deleted file mode 100644 index 9fb2968..0000000 --- a/grammar/grammar.go +++ /dev/null @@ -1,1390 +0,0 @@ -package grammar - -import ( - "fmt" - "io" - "strings" - - verr "error" - "grammar/lexical" - "grammar/symbol" - spec "spec/grammar" - "spec/grammar/parser" -) - -type astActionEntry struct { - position int - expansion bool -} - -type assocType string - -const ( - assocTypeNil = assocType("") - assocTypeLeft = assocType("left") - assocTypeRight = assocType("right") -) - -const ( - precNil = 0 - precMin = 1 -) - -// precAndAssoc represents precedence and associativities of terminal symbols and productions. -// We use the priority of the production to resolve shift/reduce conflicts. -type precAndAssoc struct { - // termPrec and termAssoc represent the precedence of the terminal symbols. - termPrec map[symbol.SymbolNum]int - termAssoc map[symbol.SymbolNum]assocType - - // prodPrec and prodAssoc represent the precedence and the associativities of the production. - // These values are inherited from the right-most terminal symbols in the RHS of the productions. - prodPrec map[productionNum]int - prodAssoc map[productionNum]assocType -} - -func (pa *precAndAssoc) terminalPrecedence(sym symbol.SymbolNum) int { - prec, ok := pa.termPrec[sym] - if !ok { - return precNil - } - - return prec -} - -func (pa *precAndAssoc) terminalAssociativity(sym symbol.SymbolNum) assocType { - assoc, ok := pa.termAssoc[sym] - if !ok { - return assocTypeNil - } - - return assoc -} - -func (pa *precAndAssoc) productionPredence(prod productionNum) int { - prec, ok := pa.prodPrec[prod] - if !ok { - return precNil - } - - return prec -} - -func (pa *precAndAssoc) productionAssociativity(prod productionNum) assocType { - assoc, ok := pa.prodAssoc[prod] - if !ok { - return assocTypeNil - } - - return assoc -} - -const reservedSymbolNameError = "error" - -type Grammar struct { - name string - lexSpec *lexical.LexSpec - skipSymbols []symbol.Symbol - productionSet *productionSet - augmentedStartSymbol symbol.Symbol - errorSymbol symbol.Symbol - symbolTable *symbol.SymbolTableReader - astActions map[productionID][]*astActionEntry - precAndAssoc *precAndAssoc - - // recoverProductions is a set of productions having the recover directive. - recoverProductions map[productionID]struct{} -} - -type buildConfig struct { - isReportingEnabled bool -} - -type BuildOption func(config *buildConfig) - -func EnableReporting() BuildOption { - return func(config *buildConfig) { - config.isReportingEnabled = true - } -} - -type GrammarBuilder struct { - AST *parser.RootNode - - errs verr.SpecErrors -} - -func (b *GrammarBuilder) Build(opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) { - gram, err := b.build() - if err != nil { - return nil, nil, err - } - - return compile(gram, opts...) -} - -func (b *GrammarBuilder) build() (*Grammar, error) { - var specName string - { - errOccurred := false - for _, dir := range b.AST.Directives { - if dir.Name != "name" { - continue - } - - if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'name' takes just one ID parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - - errOccurred = true - break - } - - specName = dir.Parameters[0].ID - break - } - - if specName == "" && !errOccurred { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrNoGrammarName, - }) - } - } - - b.checkSpellingInconsistenciesOfUserDefinedIDs(b.AST) - if len(b.errs) > 0 { - return nil, b.errs - } - - symTab, ss, err := b.genSymbolTable(b.AST) - if err != nil { - return nil, err - } - - lexSpec, skip, err := b.genLexSpecAndSkipSymbols(symTab.Reader(), b.AST) - if err != nil { - return nil, err - } - - prodsAndActs, err := b.genProductionsAndActions(b.AST, symTab.Reader(), ss.errSym, ss.augStartSym, ss.startSym) - if err != nil { - return nil, err - } - if prodsAndActs == nil && len(b.errs) > 0 { - return nil, b.errs - } - - pa, err := b.genPrecAndAssoc(symTab.Reader(), ss.errSym, prodsAndActs) - if err != nil { - return nil, err - } - if pa == nil && len(b.errs) > 0 { - return nil, b.errs - } - - syms := findUsedAndUnusedSymbols(b.AST) - if syms == nil && len(b.errs) > 0 { - return nil, b.errs - } - - // When a terminal symbol that cannot be reached from the start symbol has the skip directive, - // the compiler treats its terminal as a used symbol, not unused. - { - r := symTab.Reader() - for _, sym := range skip { - s, _ := r.ToText(sym) - if _, ok := syms.unusedTerminals[s]; !ok { - prod := syms.usedTerminals[s] - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrTermCannotBeSkipped, - Detail: s, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - continue - } - - delete(syms.unusedTerminals, s) - } - } - - for sym, prod := range syms.unusedProductions { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUnusedProduction, - Detail: sym, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - - for sym, prod := range syms.unusedTerminals { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUnusedTerminal, - Detail: sym, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - - if len(b.errs) > 0 { - return nil, b.errs - } - - return &Grammar{ - name: specName, - lexSpec: lexSpec, - skipSymbols: skip, - productionSet: prodsAndActs.prods, - augmentedStartSymbol: prodsAndActs.augStartSym, - errorSymbol: ss.errSym, - symbolTable: symTab.Reader(), - astActions: prodsAndActs.astActs, - recoverProductions: prodsAndActs.recoverProds, - precAndAssoc: pa, - }, nil -} - -type usedAndUnusedSymbols struct { - unusedProductions map[string]*parser.ProductionNode - unusedTerminals map[string]*parser.ProductionNode - usedTerminals map[string]*parser.ProductionNode -} - -func findUsedAndUnusedSymbols(root *parser.RootNode) *usedAndUnusedSymbols { - prods := map[string]*parser.ProductionNode{} - lexProds := map[string]*parser.ProductionNode{} - mark := map[string]bool{} - { - for _, p := range root.Productions { - prods[p.LHS] = p - mark[p.LHS] = false - for _, alt := range p.RHS { - for _, e := range alt.Elements { - if e.ID == "" { - continue - } - mark[e.ID] = false - } - } - } - - for _, p := range root.LexProductions { - lexProds[p.LHS] = p - mark[p.LHS] = false - } - - start := root.Productions[0] - mark[start.LHS] = true - markUsedSymbols(mark, map[string]bool{}, prods, start) - - // We don't have to check the error symbol because the error symbol doesn't have a production. - delete(mark, reservedSymbolNameError) - } - - usedTerms := make(map[string]*parser.ProductionNode, len(lexProds)) - unusedProds := map[string]*parser.ProductionNode{} - unusedTerms := map[string]*parser.ProductionNode{} - for sym, used := range mark { - if p, ok := prods[sym]; ok { - if used { - continue - } - unusedProds[sym] = p - continue - } - if p, ok := lexProds[sym]; ok { - if used { - usedTerms[sym] = p - } else { - unusedTerms[sym] = p - } - continue - } - - // May be reached here when a fragment name appears on the right-hand side of a production rule. However, an error - // to the effect that a production rule cannot contain a fragment will be detected in a subsequent process. So we can - // ignore it here. - } - - return &usedAndUnusedSymbols{ - usedTerminals: usedTerms, - unusedProductions: unusedProds, - unusedTerminals: unusedTerms, - } -} - -func markUsedSymbols(mark map[string]bool, marked map[string]bool, prods map[string]*parser.ProductionNode, prod *parser.ProductionNode) { - if marked[prod.LHS] { - return - } - - for _, alt := range prod.RHS { - for _, e := range alt.Elements { - if e.ID == "" { - continue - } - - mark[e.ID] = true - - p, ok := prods[e.ID] - if !ok { - continue - } - - // Remove a production to avoid inifinite recursion. - marked[prod.LHS] = true - - markUsedSymbols(mark, marked, prods, p) - } - } -} - -func (b *GrammarBuilder) checkSpellingInconsistenciesOfUserDefinedIDs(root *parser.RootNode) { - var ids []string - { - for _, prod := range root.Productions { - ids = append(ids, prod.LHS) - for _, alt := range prod.RHS { - for _, elem := range alt.Elements { - if elem.Label != nil { - ids = append(ids, elem.Label.Name) - } - } - } - } - for _, prod := range root.LexProductions { - ids = append(ids, prod.LHS) - } - for _, dir := range root.Directives { - dirIDs := collectUserDefinedIDsFromDirective(dir) - if len(dirIDs) > 0 { - ids = append(ids, dirIDs...) - } - } - } - - duplicated := lexical.FindSpellingInconsistencies(ids) - if len(duplicated) == 0 { - return - } - - for _, dup := range duplicated { - var s string - { - var b strings.Builder - fmt.Fprintf(&b, "%+v", dup[0]) - for _, id := range dup[1:] { - fmt.Fprintf(&b, ", %+v", id) - } - s = b.String() - } - - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrSpellingInconsistency, - Detail: s, - }) - } -} - -func collectUserDefinedIDsFromDirective(dir *parser.DirectiveNode) []string { - var ids []string - for _, param := range dir.Parameters { - if param.Group != nil { - for _, d := range param.Group { - dIDs := collectUserDefinedIDsFromDirective(d) - if len(dIDs) > 0 { - ids = append(ids, dIDs...) - } - } - } - if param.OrderedSymbol != "" { - ids = append(ids, param.OrderedSymbol) - } - } - return ids -} - -type symbols struct { - errSym symbol.Symbol - augStartSym symbol.Symbol - startSym symbol.Symbol -} - -func (b *GrammarBuilder) genSymbolTable(root *parser.RootNode) (*symbol.SymbolTable, *symbols, error) { - symTab := symbol.NewSymbolTable() - w := symTab.Writer() - r := symTab.Reader() - - // We need to register the reserved symbol before registering others. - var errSym symbol.Symbol - { - sym, err := w.RegisterTerminalSymbol(reservedSymbolNameError) - if err != nil { - return nil, nil, err - } - errSym = sym - } - - for _, prod := range root.LexProductions { - if sym, exist := r.ToSymbol(prod.LHS); exist { - if sym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrErrSymIsReserved, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } else { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateTerminal, - Detail: prod.LHS, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - - continue - } - - _, err := w.RegisterTerminalSymbol(prod.LHS) - if err != nil { - return nil, nil, err - } - } - - startProd := root.Productions[0] - augStartText := fmt.Sprintf("%s'", startProd.LHS) - var err error - augStartSym, err := w.RegisterStartSymbol(augStartText) - if err != nil { - return nil, nil, err - } - if augStartSym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrErrSymIsReserved, - Row: startProd.Pos.Row, - Col: startProd.Pos.Col, - }) - } - - startSym, err := w.RegisterNonTerminalSymbol(startProd.LHS) - if err != nil { - return nil, nil, err - } - if startSym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrErrSymIsReserved, - Row: startProd.Pos.Row, - Col: startProd.Pos.Col, - }) - } - - for _, prod := range root.Productions { - sym, err := w.RegisterNonTerminalSymbol(prod.LHS) - if err != nil { - return nil, nil, err - } - if sym.IsTerminal() { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateName, - Detail: prod.LHS, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - if sym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrErrSymIsReserved, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - } - - return symTab, &symbols{ - errSym: errSym, - augStartSym: augStartSym, - startSym: startSym, - }, nil -} - -func (b *GrammarBuilder) genLexSpecAndSkipSymbols(symTab *symbol.SymbolTableReader, root *parser.RootNode) (*lexical.LexSpec, []symbol.Symbol, error) { - entries := []*lexical.LexEntry{} - skipSyms := []symbol.Symbol{} - for _, prod := range root.LexProductions { - entry, skip, specErr, err := genLexEntry(prod) - if err != nil { - return nil, nil, err - } - if specErr != nil { - b.errs = append(b.errs, specErr) - continue - } - if skip { - sym, _ := symTab.ToSymbol(prod.LHS) - skipSyms = append(skipSyms, sym) - } - entries = append(entries, entry) - } - - checkedFragments := map[string]struct{}{} - for _, fragment := range root.Fragments { - if _, exist := checkedFragments[fragment.LHS]; exist { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateFragment, - Detail: fragment.LHS, - Row: fragment.Pos.Row, - Col: fragment.Pos.Col, - }) - continue - } - checkedFragments[fragment.LHS] = struct{}{} - - entries = append(entries, &lexical.LexEntry{ - Fragment: true, - Kind: spec.LexKindName(fragment.LHS), - Pattern: fragment.RHS, - }) - } - - return &lexical.LexSpec{ - Entries: entries, - }, skipSyms, nil -} - -func genLexEntry(prod *parser.ProductionNode) (*lexical.LexEntry, bool, *verr.SpecError, error) { - alt := prod.RHS[0] - elem := alt.Elements[0] - - var pattern string - if elem.Literally { - pattern = spec.EscapePattern(elem.Pattern) - } else { - pattern = elem.Pattern - } - - var modes []spec.LexModeName - var skip bool - var push spec.LexModeName - var pop bool - dirConsumed := map[string]struct{}{} - for _, dir := range prod.Directives { - if _, consumed := dirConsumed[dir.Name]; consumed { - return nil, false, &verr.SpecError{ - Cause: semErrDuplicateDir, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - dirConsumed[dir.Name] = struct{}{} - - switch dir.Name { - case "mode": - if len(dir.Parameters) == 0 { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'mode' directive needs an ID parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - for _, param := range dir.Parameters { - if param.ID == "" { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'mode' directive needs an ID parameter", - Row: param.Pos.Row, - Col: param.Pos.Col, - }, nil - } - modes = append(modes, spec.LexModeName(param.ID)) - } - case "skip": - if len(dir.Parameters) > 0 { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'skip' directive needs no parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - skip = true - case "push": - if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'push' directive needs an ID parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - push = spec.LexModeName(dir.Parameters[0].ID) - case "pop": - if len(dir.Parameters) > 0 { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'pop' directive needs no parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - pop = true - default: - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - } - - if len(alt.Directives) > 0 { - return nil, false, &verr.SpecError{ - Cause: semErrInvalidAltDir, - Detail: "a lexical production cannot have alternative directives", - Row: alt.Directives[0].Pos.Row, - Col: alt.Directives[0].Pos.Col, - }, nil - } - - return &lexical.LexEntry{ - Modes: modes, - Kind: spec.LexKindName(prod.LHS), - Pattern: pattern, - Push: push, - Pop: pop, - }, skip, nil, nil -} - -type productionsAndActions struct { - prods *productionSet - augStartSym symbol.Symbol - astActs map[productionID][]*astActionEntry - prodPrecsTerm map[productionID]symbol.Symbol - prodPrecsOrdSym map[productionID]string - prodPrecPoss map[productionID]*parser.Position - recoverProds map[productionID]struct{} -} - -func (b *GrammarBuilder) genProductionsAndActions(root *parser.RootNode, symTab *symbol.SymbolTableReader, errSym symbol.Symbol, augStartSym symbol.Symbol, startSym symbol.Symbol) (*productionsAndActions, error) { - if len(root.Productions) == 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrNoProduction, - }) - return nil, nil - } - - prods := newProductionSet() - astActs := map[productionID][]*astActionEntry{} - prodPrecsTerm := map[productionID]symbol.Symbol{} - prodPrecsOrdSym := map[productionID]string{} - prodPrecPoss := map[productionID]*parser.Position{} - recoverProds := map[productionID]struct{}{} - - p, err := newProduction(augStartSym, []symbol.Symbol{ - startSym, - }) - if err != nil { - return nil, err - } - - prods.append(p) - - for _, prod := range root.Productions { - lhsSym, ok := symTab.ToSymbol(prod.LHS) - if !ok { - // All symbols are assumed to be pre-detected, so it's a bug if we cannot find them here. - return nil, fmt.Errorf("symbol '%v' is undefined", prod.LHS) - } - - if len(prod.Directives) > 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrInvalidProdDir, - Detail: "a production cannot have production directives", - Row: prod.Directives[0].Pos.Row, - Col: prod.Directives[0].Pos.Col, - }) - continue - } - - LOOP_RHS: - for _, alt := range prod.RHS { - altSyms := make([]symbol.Symbol, len(alt.Elements)) - offsets := map[string]int{} - ambiguousIDOffsets := map[string]struct{}{} - for i, elem := range alt.Elements { - sym, ok := symTab.ToSymbol(elem.ID) - if !ok { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUndefinedSym, - Detail: elem.ID, - Row: elem.Pos.Row, - Col: elem.Pos.Col, - }) - continue LOOP_RHS - } - altSyms[i] = sym - - if elem.Label != nil { - if _, added := offsets[elem.Label.Name]; added { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateLabel, - Detail: elem.Label.Name, - Row: elem.Label.Pos.Row, - Col: elem.Label.Pos.Col, - }) - continue LOOP_RHS - } - if _, found := symTab.ToSymbol(elem.Label.Name); found { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrInvalidLabel, - Detail: elem.Label.Name, - Row: elem.Label.Pos.Row, - Col: elem.Label.Pos.Col, - }) - continue LOOP_RHS - } - offsets[elem.Label.Name] = i - } - // A symbol having a label can be specified by both the label and the symbol name. - // So record the symbol's position, whether or not it has a label. - if elem.ID != "" { - if _, exist := offsets[elem.ID]; exist { - // When the same symbol appears multiple times in an alternative, the symbol is ambiguous. When we need - // to specify the symbol in a directive, we cannot use the name of the ambiguous symbol. Instead, specify - // a label to resolve the ambiguity. - delete(offsets, elem.ID) - ambiguousIDOffsets[elem.ID] = struct{}{} - } else { - offsets[elem.ID] = i - } - } - } - - p, err := newProduction(lhsSym, altSyms) - if err != nil { - return nil, err - } - if _, exist := prods.findByID(p.id); exist { - // Report the line number of a duplicate alternative. - // When the alternative is empty, we report the position of its LHS. - var row int - var col int - if len(alt.Elements) > 0 { - row = alt.Elements[0].Pos.Row - col = alt.Elements[0].Pos.Col - } else { - row = prod.Pos.Row - col = prod.Pos.Col - } - - var detail string - { - var b strings.Builder - fmt.Fprintf(&b, "%v →", prod.LHS) - for _, elem := range alt.Elements { - switch { - case elem.ID != "": - fmt.Fprintf(&b, " %v", elem.ID) - case elem.Pattern != "": - fmt.Fprintf(&b, ` "%v"`, elem.Pattern) - } - } - if len(alt.Elements) == 0 { - fmt.Fprintf(&b, " ε") - } - - detail = b.String() - } - - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateProduction, - Detail: detail, - Row: row, - Col: col, - }) - continue LOOP_RHS - } - prods.append(p) - - dirConsumed := map[string]struct{}{} - for _, dir := range alt.Directives { - if _, consumed := dirConsumed[dir.Name]; consumed { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateDir, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - } - dirConsumed[dir.Name] = struct{}{} - - switch dir.Name { - case "ast": - if len(dir.Parameters) == 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'ast' directive needs at least one parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - astAct := make([]*astActionEntry, len(dir.Parameters)) - consumedOffsets := map[int]struct{}{} - for i, param := range dir.Parameters { - if param.ID == "" { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'ast' directive can take only ID parameters", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - - if _, ambiguous := ambiguousIDOffsets[param.ID]; ambiguous { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrAmbiguousElem, - Detail: fmt.Sprintf("'%v' is ambiguous", param.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - - offset, ok := offsets[param.ID] - if !ok { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("a symbol was not found in an alternative: %v", param.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - if _, consumed := consumedOffsets[offset]; consumed { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateElem, - Detail: param.ID, - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - consumedOffsets[offset] = struct{}{} - - if param.Expansion { - elem := alt.Elements[offset] - if elem.Pattern != "" { - // Currently, it is a bug to reach here because it is - // forbidden to have anything other than ID appear in - // production rules. - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("the expansion symbol cannot be applied to a pattern (%v: \"%v\")", param.ID, elem.Pattern), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - elemSym, ok := symTab.ToSymbol(elem.ID) - if !ok { - // If the symbol was not found, it's a bug. - return nil, fmt.Errorf("a symbol corresponding to an ID (%v) was not found", elem.ID) - } - if elemSym.IsTerminal() { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("the expansion symbol cannot be applied to a terminal symbol (%v: %v)", param.ID, elem.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - } - - astAct[i] = &astActionEntry{ - position: offset + 1, - expansion: param.Expansion, - } - } - astActs[p.id] = astAct - case "prec": - if len(dir.Parameters) != 1 || (dir.Parameters[0].ID == "" && dir.Parameters[0].OrderedSymbol == "") { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'prec' directive needs just one ID parameter or ordered symbol", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - param := dir.Parameters[0] - switch { - case param.ID != "": - sym, ok := symTab.ToSymbol(param.ID) - if !ok { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("unknown terminal symbol: %v", param.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - if sym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("'%v' directive cannot be applied to an error symbol", dir.Name), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - } - if !sym.IsTerminal() { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("the symbol must be a terminal: %v", param.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - prodPrecsTerm[p.id] = sym - prodPrecPoss[p.id] = ¶m.Pos - case param.OrderedSymbol != "": - prodPrecsOrdSym[p.id] = param.OrderedSymbol - prodPrecPoss[p.id] = ¶m.Pos - } - case "recover": - if len(dir.Parameters) > 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'recover' directive needs no parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - recoverProds[p.id] = struct{}{} - default: - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: fmt.Sprintf("invalid directive name '%v'", dir.Name), - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - } - } - } - - return &productionsAndActions{ - prods: prods, - augStartSym: augStartSym, - astActs: astActs, - prodPrecsTerm: prodPrecsTerm, - prodPrecsOrdSym: prodPrecsOrdSym, - prodPrecPoss: prodPrecPoss, - recoverProds: recoverProds, - }, nil -} - -func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbol.SymbolTableReader, errSym symbol.Symbol, prodsAndActs *productionsAndActions) (*precAndAssoc, error) { - termPrec := map[symbol.SymbolNum]int{} - termAssoc := map[symbol.SymbolNum]assocType{} - ordSymPrec := map[string]int{} - { - var precGroup []*parser.DirectiveNode - for _, dir := range b.AST.Directives { - if dir.Name == "prec" { - if dir.Parameters == nil || len(dir.Parameters) != 1 || dir.Parameters[0].Group == nil { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'prec' needs just one directive group", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue - } - precGroup = dir.Parameters[0].Group - continue - } - - if dir.Name != "name" && dir.Name != "prec" { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue - } - } - - precN := precMin - for _, dir := range precGroup { - var assocTy assocType - switch dir.Name { - case "left": - assocTy = assocTypeLeft - case "right": - assocTy = assocTypeRight - case "assign": - assocTy = assocTypeNil - default: - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - return nil, nil - } - - if len(dir.Parameters) == 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "associativity needs at least one symbol", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - return nil, nil - } - ASSOC_PARAM_LOOP: - for _, p := range dir.Parameters { - switch { - case p.ID != "": - sym, ok := symTab.ToSymbol(p.ID) - if !ok { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("'%v' is undefined", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - return nil, nil - } - if sym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("'%v' directive cannot be applied to an error symbol", dir.Name), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - return nil, nil - } - if !sym.IsTerminal() { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("associativity can take only terminal symbol ('%v' is a non-terminal)", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - return nil, nil - } - if prec, alreadySet := termPrec[sym.Num()]; alreadySet { - if prec == precN { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'%v' already has the same associativity and precedence", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } else if assoc := termAssoc[sym.Num()]; assoc == assocTy { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'%v' already has different precedence", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } else { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'%v' already has different associativity and precedence", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } - break ASSOC_PARAM_LOOP - } - - termPrec[sym.Num()] = precN - termAssoc[sym.Num()] = assocTy - case p.OrderedSymbol != "": - if prec, alreadySet := ordSymPrec[p.OrderedSymbol]; alreadySet { - if prec == precN { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'$%v' already has the same precedence", p.OrderedSymbol), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } else { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'$%v' already has different precedence", p.OrderedSymbol), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } - break ASSOC_PARAM_LOOP - } - - ordSymPrec[p.OrderedSymbol] = precN - default: - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "a parameter must be an ID or an ordered symbol", - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - return nil, nil - } - } - - precN++ - } - } - if len(b.errs) > 0 { - return nil, nil - } - - prodPrec := map[productionNum]int{} - prodAssoc := map[productionNum]assocType{} - for _, prod := range prodsAndActs.prods.getAllProductions() { - // A #prec directive changes only precedence, not associativity. - if term, ok := prodsAndActs.prodPrecsTerm[prod.id]; ok { - if prec, ok := termPrec[term.Num()]; ok { - prodPrec[prod.num] = prec - prodAssoc[prod.num] = assocTypeNil - } else { - text, _ := symTab.ToText(term) - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUndefinedPrec, - Detail: text, - Row: prodsAndActs.prodPrecPoss[prod.id].Row, - Col: prodsAndActs.prodPrecPoss[prod.id].Col, - }) - } - } else if ordSym, ok := prodsAndActs.prodPrecsOrdSym[prod.id]; ok { - if prec, ok := ordSymPrec[ordSym]; ok { - prodPrec[prod.num] = prec - prodAssoc[prod.num] = assocTypeNil - } else { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUndefinedOrdSym, - Detail: fmt.Sprintf("$%v", ordSym), - Row: prodsAndActs.prodPrecPoss[prod.id].Row, - Col: prodsAndActs.prodPrecPoss[prod.id].Col, - }) - } - } else { - // A production inherits precedence and associativity from the right-most terminal symbol. - mostrightTerm := symbol.SymbolNil - for _, sym := range prod.rhs { - if !sym.IsTerminal() { - continue - } - mostrightTerm = sym - } - if !mostrightTerm.IsNil() { - prodPrec[prod.num] = termPrec[mostrightTerm.Num()] - prodAssoc[prod.num] = termAssoc[mostrightTerm.Num()] - } - } - } - if len(b.errs) > 0 { - return nil, nil - } - - return &precAndAssoc{ - termPrec: termPrec, - termAssoc: termAssoc, - prodPrec: prodPrec, - prodAssoc: prodAssoc, - }, nil -} - -func compile(gram *Grammar, opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) { - config := &buildConfig{} - for _, opt := range opts { - opt(config) - } - - lexSpec, err, cErrs := lexical.Compile(gram.lexSpec, lexical.CompressionLevelMax) - if err != nil { - if len(cErrs) > 0 { - var b strings.Builder - writeCompileError(&b, cErrs[0]) - for _, cerr := range cErrs[1:] { - fmt.Fprintf(&b, "\n") - writeCompileError(&b, cerr) - } - return nil, nil, fmt.Errorf(b.String()) - } - return nil, nil, err - } - - kind2Term := make([]int, len(lexSpec.KindNames)) - for i, k := range lexSpec.KindNames { - if k == spec.LexKindNameNil { - kind2Term[spec.LexKindIDNil] = symbol.SymbolNil.Num().Int() - continue - } - - sym, ok := gram.symbolTable.ToSymbol(k.String()) - if !ok { - return nil, nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k) - } - kind2Term[i] = sym.Num().Int() - } - - termTexts, err := gram.symbolTable.TerminalTexts() - if err != nil { - return nil, nil, err - } - - var termSkip []int - { - r := gram.symbolTable.Reader() - // I want to use gram.symbolTable.terminalSymbols() here instead of gram.symbolTable.terminalTexts(), - // but gram.symbolTable.terminalSymbols() is different in length from terminalTexts - // because it does not contain a predefined symbol, like EOF. - // Therefore, we use terminalTexts, although it takes more time to lookup for symbols. - termSkip = make([]int, len(termTexts)) - for _, t := range termTexts { - s, _ := r.ToSymbol(t) - for _, sk := range gram.skipSymbols { - if s != sk { - continue - } - termSkip[s.Num()] = 1 - break - } - } - } - - nonTerms, err := gram.symbolTable.NonTerminalTexts() - if err != nil { - return nil, nil, err - } - - firstSet, err := genFirstSet(gram.productionSet) - if err != nil { - return nil, nil, err - } - - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - return nil, nil, err - } - - var tab *ParsingTable - var report *spec.Report - { - lalr1, err := genLALR1Automaton(lr0, gram.productionSet, firstSet) - if err != nil { - return nil, nil, err - } - - b := &lrTableBuilder{ - automaton: lalr1.lr0Automaton, - prods: gram.productionSet, - termCount: len(termTexts), - nonTermCount: len(nonTerms), - symTab: gram.symbolTable, - precAndAssoc: gram.precAndAssoc, - } - tab, err = b.build() - if err != nil { - return nil, nil, err - } - - if config.isReportingEnabled { - report, err = b.genReport(tab, gram) - if err != nil { - return nil, nil, err - } - } - } - - action := make([]int, len(tab.actionTable)) - for i, e := range tab.actionTable { - action[i] = int(e) - } - goTo := make([]int, len(tab.goToTable)) - for i, e := range tab.goToTable { - goTo[i] = int(e) - } - - lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1) - altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1) - recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1) - astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1) - for _, p := range gram.productionSet.getAllProductions() { - lhsSyms[p.num] = p.lhs.Num().Int() - altSymCounts[p.num] = p.rhsLen - - if _, ok := gram.recoverProductions[p.id]; ok { - recoverProds[p.num] = 1 - } - - astAct, ok := gram.astActions[p.id] - if !ok { - continue - } - astActEntry := make([]int, len(astAct)) - for i, e := range astAct { - if e.expansion { - astActEntry[i] = e.position * -1 - } else { - astActEntry[i] = e.position - } - } - astActEnties[p.num] = astActEntry - } - - return &spec.CompiledGrammar{ - Name: gram.name, - Lexical: lexSpec, - Syntactic: &spec.SyntacticSpec{ - Action: action, - GoTo: goTo, - StateCount: tab.stateCount, - InitialState: tab.InitialState.Int(), - StartProduction: productionNumStart.Int(), - LHSSymbols: lhsSyms, - AlternativeSymbolCounts: altSymCounts, - Terminals: termTexts, - TerminalCount: tab.terminalCount, - TerminalSkip: termSkip, - KindToTerminal: kind2Term, - NonTerminals: nonTerms, - NonTerminalCount: tab.nonTerminalCount, - EOFSymbol: symbol.SymbolEOF.Num().Int(), - ErrorSymbol: gram.errorSymbol.Num().Int(), - ErrorTrapperStates: tab.errorTrapperStates, - RecoverProductions: recoverProds, - }, - ASTAction: &spec.ASTAction{ - Entries: astActEnties, - }, - }, report, nil -} - -func writeCompileError(w io.Writer, cErr *lexical.CompileError) { - if cErr.Fragment { - fmt.Fprintf(w, "fragment ") - } - fmt.Fprintf(w, "%v: %v", cErr.Kind, cErr.Cause) - if cErr.Detail != "" { - fmt.Fprintf(w, ": %v", cErr.Detail) - } -} diff --git a/grammar/grammar_test.go b/grammar/grammar_test.go deleted file mode 100644 index 4a3c6a0..0000000 --- a/grammar/grammar_test.go +++ /dev/null @@ -1,3381 +0,0 @@ -package grammar - -import ( - "strings" - "testing" - - verr "error" - "spec/grammar/parser" -) - -func TestGrammarBuilderOK(t *testing.T) { - type okTest struct { - caption string - specSrc string - validate func(t *testing.T, g *Grammar) - } - - nameTests := []*okTest{ - { - caption: "the `#name` can be the same identifier as a non-terminal symbol", - specSrc: ` -#name s; - -s - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "s" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as a terminal symbol", - specSrc: ` -#name foo; - -s - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "foo" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as the error symbol", - specSrc: ` -#name error; - -s - : foo - | error - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "error" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - { - caption: "the `#name` can be the same identifier as a fragment", - specSrc: ` -#name f; - -s - : foo - ; - -foo - : "\f{f}"; -fragment f - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - expected := "f" - if g.name != expected { - t.Fatalf("unexpected name: want: %v, got: %v", expected, g.name) - } - }, - }, - } - - modeTests := []*okTest{ - { - caption: "a `#mode` can be the same identifier as a non-terminal symbol", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push s - : 'foo'; -bar #mode s - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "s" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as a terminal symbol", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push bar - : 'foo'; -bar #mode bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "bar" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as the error symbol", - specSrc: ` -#name test; - -s - : foo bar - | error - ; - -foo #push error - : 'foo'; -bar #mode error - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "error" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - { - caption: "a `#mode` can be the same identifier as a fragment", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push f - : "\f{f}"; -bar #mode f - : 'bar'; -fragment f - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - kind := "bar" - expectedMode := "f" - for _, e := range g.lexSpec.Entries { - if e.Kind.String() == kind && e.Modes[0].String() == expectedMode { - return - } - } - t.Fatalf("symbol having expected mode was not found: want: %v #mode %v", kind, expectedMode) - }, - }, - } - - precTests := []*okTest{ - { - caption: "a `#prec` allows the empty directive group", - specSrc: ` -#name test; - -#prec (); - -s - : foo - ; - -foo - : 'foo'; -`, - }, - { - caption: "a `#left` directive gives a precedence and the left associativity to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #left foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "a `#right` directive gives a precedence and the right associativity to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #right foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeRight, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "an `#assign` directive gives only a precedence to specified terminal symbols", - specSrc: ` -#name test; - -#prec ( - #assign foo bar -); - -s - : foo bar baz - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, barPrec, barAssoc) - } - var bazPrec int - var bazAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("baz") - bazPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - bazAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if bazPrec != precNil || bazAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, bazPrec, bazAssoc) - } - }, - }, - { - caption: "a production has the same precedence and associativity as the right-most terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar', not 'foo'. - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if barPrec != precNil || barAssoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, barPrec, barAssoc) - } - if sPrec != barPrec || sAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "a production has the same precedence and associativity as the right-most terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo bar // This alternative has the same precedence and associativity as the right-most terminal symbol 'bar'. - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if sPrec != barPrec || sAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "even if a non-terminal symbol apears to a terminal symbol, a production inherits precedence and associativity from the right-most terminal symbol, not from the non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo a // This alternative has the same precedence and associativity as the right-most terminal symbol 'foo', not 'a'. - ; -a - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var aPrec int - var aAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("a") - ps, _ := g.productionSet.findByLHS(s) - aPrec = g.precAndAssoc.productionPredence(ps[0].num) - aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if aPrec != barPrec || aAssoc != barAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", barPrec, barAssoc, aPrec, aAssoc) - } - if sPrec != fooPrec || sAssoc != fooAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, sPrec, sAssoc) - } - }, - }, - { - caption: "each alternative in the same production can have its own precedence and associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar - #assign baz -); - -s - : foo - | bar - | baz - | bra - ; - -foo - : 'foo'; -bar - : 'bar'; -baz - : 'baz'; -bra - : 'bra'; -`, - validate: func(t *testing.T, g *Grammar) { - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - var alt3Prec int - var alt3Assoc assocType - var alt4Prec int - var alt4Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - alt3Prec = g.precAndAssoc.productionPredence(ps[2].num) - alt3Assoc = g.precAndAssoc.productionAssociativity(ps[2].num) - alt4Prec = g.precAndAssoc.productionPredence(ps[3].num) - alt4Assoc = g.precAndAssoc.productionAssociativity(ps[3].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeLeft { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeRight { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, alt2Prec, alt2Assoc) - } - if alt3Prec != 3 || alt3Assoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt3Prec, alt3Assoc) - } - if alt4Prec != precNil || alt4Assoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt4Prec, alt4Assoc) - } - }, - }, - { - caption: "when a production contains no terminal symbols, the production will not have precedence and associativiry", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : a - ; -a - : foo - ; - -foo - : 'foo'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var aPrec int - var aAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("a") - ps, _ := g.productionSet.findByLHS(s) - aPrec = g.precAndAssoc.productionPredence(ps[0].num) - aAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if aPrec != fooPrec || aAssoc != fooAssoc { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, aPrec, aAssoc) - } - if sPrec != precNil || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", - specSrc: ` -#name test; - -#prec ( - #left foo -); - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if sPrec != fooPrec || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "the `#prec` directive applied to an alternative changes only precedence, not associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right bar -); - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var sPrec int - var sAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - sPrec = g.precAndAssoc.productionPredence(ps[0].num) - sAssoc = g.precAndAssoc.productionAssociativity(ps[0].num) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - if sPrec != fooPrec || sAssoc != assocTypeNil { - t.Fatalf("unexpected production precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, assocTypeNil, sPrec, sAssoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#left` directive", - specSrc: ` -#name test; - -#prec ( - #left $high - #right foo bar - #left $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 2 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeRight, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 3 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#right` directive", - specSrc: ` -#name test; - -#prec ( - #right $high - #left foo bar - #right $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 2 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 3 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "an ordered symbol can appear in a `#assign` directive", - specSrc: ` -#name test; - -#prec ( - #assign $high - #left foo - #right bar - #assign $low -); - -s - : foo #prec $high - | bar #prec $low - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 2 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeLeft, fooPrec, fooAssoc) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 3 || barAssoc != assocTypeRight { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 3, assocTypeRight, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != 1 || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 4 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 4, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "names of an ordered symbol and a terminal symbol can duplicate", - specSrc: ` -#name test; - -#prec ( - #left foo bar - #right $foo -); - -s - : foo - | bar #prec $foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var fooPrec int - var fooAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("foo") - fooPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - fooAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if fooPrec != 1 || fooAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, fooPrec, fooAssoc) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != fooPrec || alt1Assoc != fooAssoc { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", fooPrec, fooAssoc, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - { - caption: "names of an ordered symbol and a non-terminal symbol can duplicate", - specSrc: ` -#name test; - -#prec ( - #left foo bar - #right $a -); - -s - : a - | bar #prec $a - ; -a - : foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - validate: func(t *testing.T, g *Grammar) { - var barPrec int - var barAssoc assocType - { - s, _ := g.symbolTable.ToSymbol("bar") - barPrec = g.precAndAssoc.terminalPrecedence(s.Num()) - barAssoc = g.precAndAssoc.terminalAssociativity(s.Num()) - } - if barPrec != 1 || barAssoc != assocTypeLeft { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 1, assocTypeLeft, barPrec, barAssoc) - } - var alt1Prec int - var alt1Assoc assocType - var alt2Prec int - var alt2Assoc assocType - { - s, _ := g.symbolTable.ToSymbol("s") - ps, _ := g.productionSet.findByLHS(s) - alt1Prec = g.precAndAssoc.productionPredence(ps[0].num) - alt1Assoc = g.precAndAssoc.productionAssociativity(ps[0].num) - alt2Prec = g.precAndAssoc.productionPredence(ps[1].num) - alt2Assoc = g.precAndAssoc.productionAssociativity(ps[1].num) - } - if alt1Prec != precNil || alt1Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", precNil, assocTypeNil, alt1Prec, alt1Assoc) - } - if alt2Prec != 2 || alt2Assoc != assocTypeNil { - t.Fatalf("unexpected terminal precedence and associativity: want: (prec: %v, assoc: %v), got: (prec: %v, assoc: %v)", 2, assocTypeNil, alt2Prec, alt2Assoc) - } - }, - }, - } - - var tests []*okTest - tests = append(tests, nameTests...) - tests = append(tests, modeTests...) - tests = append(tests, precTests...) - - for _, test := range tests { - t.Run(test.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(test.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - g, err := b.build() - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if test.validate != nil { - test.validate(t, g) - } - }) - } -} - -func TestGrammarBuilderSpecError(t *testing.T) { - type specErrTest struct { - caption string - specSrc string - errs []error - } - - spellingInconsistenciesTests := []*specErrTest{ - { - caption: "a spelling inconsistency appears among non-terminal symbols", - specSrc: ` -#name test; - -a1 - : a_1 - ; -a_1 - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among terminal symbols", - specSrc: ` -#name test; - -s - : foo1 foo_1 - ; - -foo1 - : 'foo1'; -foo_1 - : 'foo_1'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among non-terminal and terminal symbols", - specSrc: ` -#name test; - -a1 - : a_1 - ; - -a_1 - : 'a_1'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among ordered symbols whose precedence is the same", - specSrc: ` -#name test; - -#prec ( - #assign $p1 $p_1 -); - -s - : foo #prec $p1 - | bar #prec $p_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among ordered symbols whose precedence is not the same", - specSrc: ` -#name test; - -#prec ( - #assign $p1 - #assign $p_1 -); - -s - : foo #prec $p1 - | bar #prec $p_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels the same alternative contains", - specSrc: ` -#name test; - -s - : foo@l1 foo@l_1 - ; - -foo - : 'foo'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels the same production contains", - specSrc: ` -#name test; - -s - : foo@l1 - | bar@l_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - { - caption: "a spelling inconsistency appears among labels different productions contain", - specSrc: ` -#name test; - -s - : foo@l1 - ; -a - : bar@l_1 - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrSpellingInconsistency}, - }, - } - - prodTests := []*specErrTest{ - { - caption: "a production `b` is unused", - specSrc: ` -#name test; - -a - : foo - ; -b - : foo - ; - -foo - : "foo"; -`, - errs: []error{semErrUnusedProduction}, - }, - { - caption: "a terminal symbol `bar` is unused", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrUnusedTerminal}, - }, - { - caption: "a production `b` and terminal symbol `bar` is unused", - specSrc: ` -#name test; - -a - : foo - ; -b - : bar - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{ - semErrUnusedProduction, - semErrUnusedTerminal, - }, - }, - { - caption: "a production cannot have production directives", - specSrc: ` -#name test; - -s #prec foo - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrInvalidProdDir}, - }, - { - caption: "a lexical production cannot have alternative directives", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo' #skip; -`, - errs: []error{semErrInvalidAltDir}, - }, - { - caption: "a production directive must not be duplicated", - specSrc: ` -#name test; - -s - : foo - ; - -foo #skip #skip - : 'foo'; -`, - errs: []error{semErrDuplicateDir}, - }, - { - caption: "an alternative directive must not be duplicated", - specSrc: ` -#name test; - -s - : foo bar #ast foo bar #ast foo bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateDir}, - }, - { - caption: "a production must not have a duplicate alternative (non-empty alternatives)", - specSrc: ` -#name test; - -s - : foo - | foo - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (non-empty and split alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : bar - ; -s - : foo - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (empty alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : - | - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a production must not have a duplicate alternative (empty and split alternatives)", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : - | foo - ; -a - : - ; - -foo - : "foo"; -`, - errs: []error{semErrDuplicateProduction}, - }, - { - caption: "a terminal symbol and a non-terminal symbol (start symbol) are duplicates", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "foo"; -s - : "a"; -`, - errs: []error{semErrDuplicateName}, - }, - { - caption: "a terminal symbol and a non-terminal symbol (not start symbol) are duplicates", - specSrc: ` -#name test; - -s - : foo - | a - ; -a - : bar - ; - -foo - : "foo"; -bar - : "bar"; -a - : "a"; -`, - errs: []error{semErrDuplicateName}, - }, - { - caption: "an invalid top-level directive", - specSrc: ` -#name test; - -#foo; - -s - : a - ; - -a - : 'a'; -`, - errs: []error{semErrDirInvalidName}, - }, - { - caption: "a label must be unique in an alternative", - specSrc: ` -#name test; - -s - : foo@x bar@x - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateLabel}, - }, - { - caption: "a label cannot be the same name as terminal symbols", - specSrc: ` -#name test; - -s - : foo bar@foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDuplicateLabel}, - }, - { - caption: "a label cannot be the same name as non-terminal symbols", - specSrc: ` -#name test; - -s - : foo@a - | a - ; -a - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{ - semErrInvalidLabel, - }, - }, - } - - nameDirTests := []*specErrTest{ - { - caption: "the `#name` directive is required", - specSrc: ` -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrNoGrammarName}, - }, - { - caption: "the `#name` directive needs an ID parameter", - specSrc: ` -#name; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive cannot take a pattern parameter", - specSrc: ` -#name "test"; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive cannot take a string parameter", - specSrc: ` -#name 'test'; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#name` directive takes just one parameter", - specSrc: ` -#name test1 test2; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - precDirTests := []*specErrTest{ - { - caption: "the `#prec` directive needs a directive group parameter", - specSrc: ` -#name test; - -#prec; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an ID parameter", - specSrc: ` -#name test; - -#prec foo; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec $x; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec "foo"; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec 'foo'; - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive takes just one directive group parameter", - specSrc: ` -#name test; - -#prec () (); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - leftDirTests := []*specErrTest{ - { - caption: "the `#left` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #left -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #left error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #left x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #left "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #left 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` directive cannot take a directive parameter", - specSrc: ` -#name test; - -#prec ( - #left () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#left` dirctive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #left foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#left` dirctive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #left $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #left foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #left $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #right foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #right $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - rightDirTests := []*specErrTest{ - { - caption: "the `#right` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #right -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #right error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #right x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #right s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #right "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #right 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -#prec ( - #right () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#right` directive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #right foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#right` directive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #right $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #right foo - #right foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #right $x - #right $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #left foo - #right foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #left $x - #right $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - assignDirTests := []*specErrTest{ - { - caption: "the `#assign` directive needs ID parameters", - specSrc: ` -#name test; - -#prec ( - #assign -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -#prec ( - #assign error -); - -s - : foo semi_colon - | error semi_colon - ; - -foo - : 'foo'; -semi_colon - : ';'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -#prec ( - #assign x -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -#prec ( - #assign s -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -#prec ( - #assign "foo" -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a string parameter", - specSrc: ` -#name test; - -#prec ( - #assign 'foo' -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` directive cannot take a directive parameter", - specSrc: ` -#name test; - -#prec ( - #assign () -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#assign` dirctive cannot be specified multiple times for a terminal symbol", - specSrc: ` -#name test; - -#prec ( - #assign foo foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "the `#assign` dirctive cannot be specified multiple times for an ordered symbol", - specSrc: ` -#name test; - -#prec ( - #assign $x $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #assign foo - #assign foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different precedence", - specSrc: ` -#name test; - -#prec ( - #assign $x - #assign $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "a terminal symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #assign foo - #left foo -); - -s - : foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - { - caption: "an ordered symbol cannot have different associativity", - specSrc: ` -#name test; - -#prec ( - #assign $x - #left $x -); - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateAssoc}, - }, - } - - errorSymTests := []*specErrTest{ - { - caption: "cannot use the error symbol as a non-terminal symbol", - specSrc: ` -#name test; - -s - : error - ; -error - : foo - ; - -foo: 'foo'; -`, - errs: []error{ - semErrErrSymIsReserved, - semErrDuplicateName, - }, - }, - { - caption: "cannot use the error symbol as a terminal symbol", - specSrc: ` -#name test; - -s - : error - ; - -error: 'error'; -`, - errs: []error{semErrErrSymIsReserved}, - }, - { - caption: "cannot use the error symbol as a terminal symbol, even if given the skip directive", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : 'foo'; -error #skip - : 'error'; -`, - errs: []error{semErrErrSymIsReserved}, - }, - } - - astDirTests := []*specErrTest{ - { - caption: "the `#ast` directive needs ID or label prameters", - specSrc: ` -#name test; - -s - : foo #ast - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo #ast $x - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #ast "foo" - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #ast 'foo' - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#ast` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo #ast () - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a parameter of the `#ast` directive must be either a symbol or a label in an alternative", - specSrc: ` -#name test; - -s - : foo bar #ast foo x - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol in a different alternative cannot be a parameter of the `#ast` directive", - specSrc: ` -#name test; - -s - : foo #ast bar - | bar - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a label in a different alternative cannot be a parameter of the `#ast` directive", - specSrc: ` -#name test; - -s - : foo #ast b - | bar@b - ; - -foo - : "foo"; -bar - : "bar"; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol can appear in the `#ast` directive only once", - specSrc: ` -#name test; - -s - : foo #ast foo foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "a label can appear in the `#ast` directive only once", - specSrc: ` -#name test; - -s - : foo@x #ast x x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "a symbol can appear in the `#ast` directive only once, even if the symbol has a label", - specSrc: ` -#name test; - -s - : foo@x #ast foo x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDuplicateElem}, - }, - { - caption: "symbol `foo` is ambiguous because it appears in an alternative twice", - specSrc: ` -#name test; - -s - : foo foo #ast foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrAmbiguousElem}, - }, - { - caption: "symbol `foo` is ambiguous because it appears in an alternative twice, even if one of them has a label", - specSrc: ` -#name test; - -s - : foo@x foo #ast foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrAmbiguousElem}, - }, - { - caption: "the expansion operator cannot be applied to a terminal symbol", - specSrc: ` -#name test; - -s - : foo #ast foo... - ; - -foo - : "foo"; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - altPrecDirTests := []*specErrTest{ - { - caption: "the `#prec` directive needs an ID parameter or an ordered symbol parameter", - specSrc: ` -#name test; - -s - : foo #prec - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot be applied to an error symbol", - specSrc: ` -#name test; - -s - : foo #prec error - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an undefined symbol", - specSrc: ` -#name test; - -s - : foo #prec x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a non-terminal symbol", - specSrc: ` -#name test; - -s - : a #prec b - | b - ; -a - : foo - ; -b - : bar - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take an undefined ordered symbol parameter", - specSrc: ` -#name test; - -s - : foo #prec $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrUndefinedOrdSym}, - }, - { - caption: "the `#prec` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #prec "foo" - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #prec 'foo' - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#prec` directive cannot take a directive parameter", - specSrc: ` -#name test; - -s - : foo #prec () - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a symbol the `#prec` directive takes must be given precedence explicitly", - specSrc: ` -#name test; - -s - : foo bar #prec foo - ; - -foo - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrUndefinedPrec}, - }, - } - - recoverDirTests := []*specErrTest{ - { - caption: "the `#recover` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo #recover foo - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo #recover $x - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo #recover "foo" - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo #recover 'foo' - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#recover` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo #recover () - ; - -foo - : 'foo'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - fragmentTests := []*specErrTest{ - { - caption: "a production cannot contain a fragment", - specSrc: ` -#name test; - -s - : f - ; - -fragment f - : 'fragment'; -`, - errs: []error{semErrUndefinedSym}, - }, - { - caption: "fragments cannot be duplicated", - specSrc: ` -#name test; - -s - : foo - ; - -foo - : "\f{f}"; -fragment f - : 'fragment 1'; -fragment f - : 'fragment 2'; -`, - errs: []error{semErrDuplicateFragment}, - }, - } - - modeDirTests := []*specErrTest{ - { - caption: "the `#mode` directive needs an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo - : 'foo'; -bar #mode $x - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode "mode_1" - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode 'mode_1' - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#mode` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 - : 'foo'; -bar #mode () - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - pushDirTests := []*specErrTest{ - { - caption: "the `#push` directive needs an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive takes just one ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push mode_1 mode_2 - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo #push $x - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push "mode_1" - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push 'mode_1' - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#push` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #push () - : 'foo'; -bar #mode mode_1 - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - popDirTests := []*specErrTest{ - { - caption: "the `#pop` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop mode_1 - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop $x - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop "mode_1" - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop 'mode_1' - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#pop` directive cannot take a directive parameter", - specSrc: ` -#name test; - -s - : foo bar baz - ; - -foo #push mode_1 - : 'foo'; -bar #mode mode_1 - : 'bar'; -baz #pop () - : 'baz'; -`, - errs: []error{semErrDirInvalidParam}, - }, - } - - skipDirTests := []*specErrTest{ - { - caption: "the `#skip` directive cannot take an ID parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip bar - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take an ordered symbol parameter", - specSrc: ` -#name test; - -#prec ( - #assign $x -); - -s - : foo bar - ; - -foo #skip $x - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a pattern parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip "bar" - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a string parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip 'bar' - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "the `#skip` directive cannot take a directive group parameter", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip () - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrDirInvalidParam}, - }, - { - caption: "a terminal symbol used in productions cannot have the skip directive", - specSrc: ` -#name test; - -s - : foo bar - ; - -foo #skip - : 'foo'; -bar - : 'bar'; -`, - errs: []error{semErrTermCannotBeSkipped}, - }, - } - - var tests []*specErrTest - tests = append(tests, spellingInconsistenciesTests...) - tests = append(tests, prodTests...) - tests = append(tests, nameDirTests...) - tests = append(tests, precDirTests...) - tests = append(tests, leftDirTests...) - tests = append(tests, rightDirTests...) - tests = append(tests, assignDirTests...) - tests = append(tests, errorSymTests...) - tests = append(tests, astDirTests...) - tests = append(tests, altPrecDirTests...) - tests = append(tests, recoverDirTests...) - tests = append(tests, fragmentTests...) - tests = append(tests, modeDirTests...) - tests = append(tests, pushDirTests...) - tests = append(tests, popDirTests...) - tests = append(tests, skipDirTests...) - for _, test := range tests { - t.Run(test.caption, func(t *testing.T) { - ast, err := parser.Parse(strings.NewReader(test.specSrc)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - _, err = b.build() - if err == nil { - t.Fatal("an expected error didn't occur") - } - specErrs, ok := err.(verr.SpecErrors) - if !ok { - t.Fatalf("unexpected error type: want: %T, got: %T: %v", verr.SpecErrors{}, err, err) - } - if len(specErrs) != len(test.errs) { - t.Fatalf("unexpected spec error count: want: %+v, got: %+v", test.errs, specErrs) - } - for _, expected := range test.errs { - for _, actual := range specErrs { - if actual.Cause == expected { - return - } - } - } - t.Fatalf("an expected spec error didn't occur: want: %v, got: %+v", test.errs, specErrs) - }) - } -} diff --git a/grammar/item.go b/grammar/item.go deleted file mode 100644 index aa1ecaf..0000000 --- a/grammar/item.go +++ /dev/null @@ -1,206 +0,0 @@ -package grammar - -import ( - "crypto/sha256" - "encoding/binary" - "fmt" - "sort" - "strconv" - - "grammar/symbol" -) - -type lrItemID [32]byte - -func (id lrItemID) String() string { - return fmt.Sprintf("%x", id.num()) -} - -func (id lrItemID) num() uint32 { - return binary.LittleEndian.Uint32(id[:]) -} - -type lookAhead struct { - symbols map[symbol.Symbol]struct{} - - // When propagation is true, an item propagates look-ahead symbols to other items. - propagation bool -} - -type lrItem struct { - id lrItemID - prod productionID - - // E → E + T - // - // Dot | Dotted Symbol | Item - // ----+---------------+------------ - // 0 | E | E →・E + T - // 1 | + | E → E・+ T - // 2 | T | E → E +・T - // 3 | Nil | E → E + T・ - dot int - dottedSymbol symbol.Symbol - - // When initial is true, the LHS of the production is the augmented start symbol and dot is 0. - // It looks like S' →・S. - initial bool - - // When reducible is true, the item looks like E → E + T・. - reducible bool - - // When kernel is true, the item is kernel item. - kernel bool - - // lookAhead stores look-ahead symbols, and they are terminal symbols. - // The item is reducible only when the look-ahead symbols appear as the next input symbol. - lookAhead lookAhead -} - -func newLR0Item(prod *production, dot int) (*lrItem, error) { - if prod == nil { - return nil, fmt.Errorf("production must be non-nil") - } - - if dot < 0 || dot > prod.rhsLen { - return nil, fmt.Errorf("dot must be between 0 and %v", prod.rhsLen) - } - - var id lrItemID - { - b := []byte{} - b = append(b, prod.id[:]...) - bDot := make([]byte, 8) - binary.LittleEndian.PutUint64(bDot, uint64(dot)) - b = append(b, bDot...) - id = sha256.Sum256(b) - } - - dottedSymbol := symbol.SymbolNil - if dot < prod.rhsLen { - dottedSymbol = prod.rhs[dot] - } - - initial := false - if prod.lhs.IsStart() && dot == 0 { - initial = true - } - - reducible := false - if dot == prod.rhsLen { - reducible = true - } - - kernel := false - if initial || dot > 0 { - kernel = true - } - - item := &lrItem{ - id: id, - prod: prod.id, - dot: dot, - dottedSymbol: dottedSymbol, - initial: initial, - reducible: reducible, - kernel: kernel, - } - - return item, nil -} - -type kernelID [32]byte - -func (id kernelID) String() string { - return fmt.Sprintf("%x", binary.LittleEndian.Uint32(id[:])) -} - -type kernel struct { - id kernelID - items []*lrItem -} - -func newKernel(items []*lrItem) (*kernel, error) { - if len(items) == 0 { - return nil, fmt.Errorf("a kernel need at least one item") - } - - // Remove duplicates from items. - var sortedItems []*lrItem - { - m := map[lrItemID]*lrItem{} - for _, item := range items { - if !item.kernel { - return nil, fmt.Errorf("not a kernel item: %v", item) - } - m[item.id] = item - } - sortedItems = []*lrItem{} - for _, item := range m { - sortedItems = append(sortedItems, item) - } - sort.Slice(sortedItems, func(i, j int) bool { - return sortedItems[i].id.num() < sortedItems[j].id.num() - }) - } - - var id kernelID - { - b := []byte{} - for _, item := range sortedItems { - b = append(b, item.id[:]...) - } - id = sha256.Sum256(b) - } - - return &kernel{ - id: id, - items: sortedItems, - }, nil -} - -type stateNum int - -const stateNumInitial = stateNum(0) - -func (n stateNum) Int() int { - return int(n) -} - -func (n stateNum) String() string { - return strconv.Itoa(int(n)) -} - -func (n stateNum) next() stateNum { - return stateNum(n + 1) -} - -type lrState struct { - *kernel - num stateNum - next map[symbol.Symbol]kernelID - reducible map[productionID]struct{} - - // emptyProdItems stores items that have an empty production like `p → ε` and is reducible. - // Thus the items emptyProdItems stores are like `p → ・ε`. emptyProdItems is needed to store - // look-ahead symbols because the kernel items don't include these items. - // - // For instance, we have the following productions, and A is a terminal symbol. - // - // s' → s - // s → A | ε - // - // CLOSURE({s' → ・s}) generates the following closure, but the kernel of this closure doesn't - // include `s → ・ε`. - // - // s' → ・s - // s → ・A - // s → ・ε - emptyProdItems []*lrItem - - // When isErrorTrapper is `true`, the item can shift the `error` symbol. The item has the following form. - // The `α` and `β` can be empty. - // - // A → α・error β - isErrorTrapper bool -} diff --git a/grammar/lalr1.go b/grammar/lalr1.go deleted file mode 100644 index 4c7265d..0000000 --- a/grammar/lalr1.go +++ /dev/null @@ -1,318 +0,0 @@ -package grammar - -import ( - "fmt" - - "grammar/symbol" -) - -type stateAndLRItem struct { - kernelID kernelID - itemID lrItemID -} - -type propagation struct { - src *stateAndLRItem - dest []*stateAndLRItem -} - -type lalr1Automaton struct { - *lr0Automaton -} - -func genLALR1Automaton(lr0 *lr0Automaton, prods *productionSet, first *firstSet) (*lalr1Automaton, error) { - // Set the look-ahead symbol <EOF> to the initial item: [S' → ・S, $] - iniState := lr0.states[lr0.initialState] - iniState.items[0].lookAhead.symbols = map[symbol.Symbol]struct{}{ - symbol.SymbolEOF: {}, - } - - var props []*propagation - for _, state := range lr0.states { - for _, kItem := range state.items { - items, err := genLALR1Closure(kItem, prods, first) - if err != nil { - return nil, err - } - - kItem.lookAhead.propagation = true - - var propDests []*stateAndLRItem - for _, item := range items { - if item.reducible { - p, ok := prods.findByID(item.prod) - if !ok { - return nil, fmt.Errorf("production not found: %v", item.prod) - } - - if p.isEmpty() { - var reducibleItem *lrItem - for _, it := range state.emptyProdItems { - if it.id != item.id { - continue - } - - reducibleItem = it - break - } - if reducibleItem == nil { - return nil, fmt.Errorf("reducible item not found: %v", item.id) - } - if reducibleItem.lookAhead.symbols == nil { - reducibleItem.lookAhead.symbols = map[symbol.Symbol]struct{}{} - } - for a := range item.lookAhead.symbols { - reducibleItem.lookAhead.symbols[a] = struct{}{} - } - - propDests = append(propDests, &stateAndLRItem{ - kernelID: state.id, - itemID: item.id, - }) - } - - continue - } - - nextKID := state.next[item.dottedSymbol] - var nextItemID lrItemID - { - p, ok := prods.findByID(item.prod) - if !ok { - return nil, fmt.Errorf("production not found: %v", item.prod) - } - it, err := newLR0Item(p, item.dot+1) - if err != nil { - return nil, fmt.Errorf("failed to generate an item ID: %v", err) - } - nextItemID = it.id - } - - if item.lookAhead.propagation { - propDests = append(propDests, &stateAndLRItem{ - kernelID: nextKID, - itemID: nextItemID, - }) - } else { - nextState := lr0.states[nextKID] - var nextItem *lrItem - for _, it := range nextState.items { - if it.id != nextItemID { - continue - } - nextItem = it - break - } - if nextItem == nil { - return nil, fmt.Errorf("item not found: %v", nextItemID) - } - - if nextItem.lookAhead.symbols == nil { - nextItem.lookAhead.symbols = map[symbol.Symbol]struct{}{} - } - - for a := range item.lookAhead.symbols { - nextItem.lookAhead.symbols[a] = struct{}{} - } - } - } - if len(propDests) == 0 { - continue - } - - props = append(props, &propagation{ - src: &stateAndLRItem{ - kernelID: state.id, - itemID: kItem.id, - }, - dest: propDests, - }) - } - } - - err := propagateLookAhead(lr0, props) - if err != nil { - return nil, fmt.Errorf("failed to propagate look-ahead symbols: %v", err) - } - - return &lalr1Automaton{ - lr0Automaton: lr0, - }, nil -} - -func genLALR1Closure(srcItem *lrItem, prods *productionSet, first *firstSet) ([]*lrItem, error) { - items := []*lrItem{} - knownItems := map[lrItemID]map[symbol.Symbol]struct{}{} - knownItemsProp := map[lrItemID]struct{}{} - uncheckedItems := []*lrItem{} - items = append(items, srcItem) - uncheckedItems = append(uncheckedItems, srcItem) - for len(uncheckedItems) > 0 { - nextUncheckedItems := []*lrItem{} - for _, item := range uncheckedItems { - if item.dottedSymbol.IsTerminal() { - continue - } - - p, ok := prods.findByID(item.prod) - if !ok { - return nil, fmt.Errorf("production not found: %v", item.prod) - } - - var fstSyms []symbol.Symbol - var isFstNullable bool - { - fst, err := first.find(p, item.dot+1) - if err != nil { - return nil, err - } - - fstSyms = make([]symbol.Symbol, len(fst.symbols)) - i := 0 - for s := range fst.symbols { - fstSyms[i] = s - i++ - } - if fst.empty { - isFstNullable = true - } - } - - ps, _ := prods.findByLHS(item.dottedSymbol) - for _, prod := range ps { - var lookAhead []symbol.Symbol - { - var lookAheadCount int - if isFstNullable { - lookAheadCount = len(fstSyms) + len(item.lookAhead.symbols) - } else { - lookAheadCount = len(fstSyms) - } - - lookAhead = make([]symbol.Symbol, lookAheadCount) - i := 0 - for _, s := range fstSyms { - lookAhead[i] = s - i++ - } - if isFstNullable { - for a := range item.lookAhead.symbols { - lookAhead[i] = a - i++ - } - } - } - - for _, a := range lookAhead { - newItem, err := newLR0Item(prod, 0) - if err != nil { - return nil, err - } - if items, exist := knownItems[newItem.id]; exist { - if _, exist := items[a]; exist { - continue - } - } - - newItem.lookAhead.symbols = map[symbol.Symbol]struct{}{ - a: {}, - } - - items = append(items, newItem) - if knownItems[newItem.id] == nil { - knownItems[newItem.id] = map[symbol.Symbol]struct{}{} - } - knownItems[newItem.id][a] = struct{}{} - nextUncheckedItems = append(nextUncheckedItems, newItem) - } - - if isFstNullable { - newItem, err := newLR0Item(prod, 0) - if err != nil { - return nil, err - } - if _, exist := knownItemsProp[newItem.id]; exist { - continue - } - - newItem.lookAhead.propagation = true - - items = append(items, newItem) - knownItemsProp[newItem.id] = struct{}{} - nextUncheckedItems = append(nextUncheckedItems, newItem) - } - } - } - uncheckedItems = nextUncheckedItems - } - - return items, nil -} - -func propagateLookAhead(lr0 *lr0Automaton, props []*propagation) error { - for { - changed := false - for _, prop := range props { - srcState, ok := lr0.states[prop.src.kernelID] - if !ok { - return fmt.Errorf("source state not found: %v", prop.src.kernelID) - } - var srcItem *lrItem - for _, item := range srcState.items { - if item.id != prop.src.itemID { - continue - } - srcItem = item - break - } - if srcItem == nil { - return fmt.Errorf("source item not found: %v", prop.src.itemID) - } - - for _, dest := range prop.dest { - destState, ok := lr0.states[dest.kernelID] - if !ok { - return fmt.Errorf("destination state not found: %v", dest.kernelID) - } - var destItem *lrItem - for _, item := range destState.items { - if item.id != dest.itemID { - continue - } - destItem = item - break - } - if destItem == nil { - for _, item := range destState.emptyProdItems { - if item.id != dest.itemID { - continue - } - destItem = item - break - } - if destItem == nil { - return fmt.Errorf("destination item not found: %v", dest.itemID) - } - } - - for a := range srcItem.lookAhead.symbols { - if _, ok := destItem.lookAhead.symbols[a]; ok { - continue - } - - if destItem.lookAhead.symbols == nil { - destItem.lookAhead.symbols = map[symbol.Symbol]struct{}{} - } - - destItem.lookAhead.symbols[a] = struct{}{} - changed = true - } - } - } - if !changed { - break - } - } - - return nil -} diff --git a/grammar/lalr1_test.go b/grammar/lalr1_test.go deleted file mode 100644 index d08468d..0000000 --- a/grammar/lalr1_test.go +++ /dev/null @@ -1,187 +0,0 @@ -package grammar - -import ( - "strings" - "testing" - - "grammar/symbol" - "spec/grammar/parser" -) - -func TestGenLALR1Automaton(t *testing.T) { - // This grammar belongs to LALR(1) class, not SLR(1). - src := ` -#name test; - -s: l eq r | r; -l: ref r | id; -r: l; -eq: '='; -ref: '*'; -id: "[A-Za-z0-9_]+"; -` - - var gram *Grammar - var automaton *lalr1Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - - firstSet, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatalf("failed to create a FIRST set: %v", err) - } - - automaton, err = genLALR1Automaton(lr0, gram.productionSet, firstSet) - if err != nil { - t.Fatalf("failed to create a LALR1 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLALR1Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), - }, - 1: { - withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), - }, - 2: { - withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), - withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), - }, - 3: { - withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), - }, - 4: { - withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 5: { - withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), - }, - 6: { - withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), - }, - 7: { - withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 8: { - withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), - }, - 9: { - withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("l"): expectedKernels[2], - genSym("r"): expectedKernels[3], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s'", "s"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("eq"): expectedKernels[6], - }, - reducibleProds: []*production{ - genProd("r", "l"), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "r"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[7], - genSym("l"): expectedKernels[8], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[5], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("l", "id"), - }, - }, - { - kernelItems: expectedKernels[6], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[9], - genSym("l"): expectedKernels[8], - genSym("ref"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[7], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("l", "ref", "r"), - }, - }, - { - kernelItems: expectedKernels[8], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("r", "l"), - }, - }, - { - kernelItems: expectedKernels[9], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "l", "eq", "r"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton.lr0Automaton) -} diff --git a/grammar/lexical/compiler.go b/grammar/lexical/compiler.go deleted file mode 100644 index 06e4068..0000000 --- a/grammar/lexical/compiler.go +++ /dev/null @@ -1,413 +0,0 @@ -package lexical - -import ( - "bytes" - "fmt" - - "compressor" - "grammar/lexical/dfa" - psr "grammar/lexical/parser" - spec "spec/grammar" -) - -type CompileError struct { - Kind spec.LexKindName - Fragment bool - Cause error - Detail string -} - -func Compile(lexspec *LexSpec, compLv int) (*spec.LexicalSpec, error, []*CompileError) { - err := lexspec.Validate() - if err != nil { - return nil, fmt.Errorf("invalid lexical specification:\n%w", err), nil - } - - modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries) - - modeSpecs := []*spec.CompiledLexModeSpec{ - nil, - } - for i, es := range modeEntries[1:] { - modeName := modeNames[i+1] - modeSpec, err, cerrs := compile(es, modeName2ID, fragmetns, compLv) - if err != nil { - return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err), cerrs - } - modeSpecs = append(modeSpecs, modeSpec) - } - - var kindNames []spec.LexKindName - var name2ID map[spec.LexKindName]spec.LexKindID - { - name2ID = map[spec.LexKindName]spec.LexKindID{} - id := spec.LexKindIDMin - for _, modeSpec := range modeSpecs[1:] { - for _, name := range modeSpec.KindNames[1:] { - if _, ok := name2ID[name]; ok { - continue - } - name2ID[name] = id - id++ - } - } - - kindNames = make([]spec.LexKindName, len(name2ID)+1) - for name, id := range name2ID { - kindNames[id] = name - } - } - - var kindIDs [][]spec.LexKindID - { - kindIDs = make([][]spec.LexKindID, len(modeSpecs)) - for i, modeSpec := range modeSpecs[1:] { - ids := make([]spec.LexKindID, len(modeSpec.KindNames)) - for modeID, name := range modeSpec.KindNames { - if modeID == 0 { - continue - } - ids[modeID] = name2ID[name] - } - kindIDs[i+1] = ids - } - } - - return &spec.LexicalSpec{ - InitialModeID: spec.LexModeIDDefault, - ModeNames: modeNames, - KindNames: kindNames, - KindIDs: kindIDs, - CompressionLevel: compLv, - Specs: modeSpecs, - }, nil, nil -} - -func groupEntriesByLexMode(entries []*LexEntry) ([][]*LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[spec.LexKindName]*LexEntry) { - modeNames := []spec.LexModeName{ - spec.LexModeNameNil, - spec.LexModeNameDefault, - } - modeName2ID := map[spec.LexModeName]spec.LexModeID{ - spec.LexModeNameNil: spec.LexModeIDNil, - spec.LexModeNameDefault: spec.LexModeIDDefault, - } - lastModeID := spec.LexModeIDDefault - modeEntries := [][]*LexEntry{ - nil, - {}, - } - fragments := map[spec.LexKindName]*LexEntry{} - for _, e := range entries { - if e.Fragment { - fragments[e.Kind] = e - continue - } - ms := e.Modes - if len(ms) == 0 { - ms = []spec.LexModeName{ - spec.LexModeNameDefault, - } - } - for _, modeName := range ms { - modeID, ok := modeName2ID[modeName] - if !ok { - modeID = lastModeID + 1 - lastModeID = modeID - modeName2ID[modeName] = modeID - modeNames = append(modeNames, modeName) - modeEntries = append(modeEntries, []*LexEntry{}) - } - modeEntries[modeID] = append(modeEntries[modeID], e) - } - } - return modeEntries, modeNames, modeName2ID, fragments -} - -func compile( - entries []*LexEntry, - modeName2ID map[spec.LexModeName]spec.LexModeID, - fragments map[spec.LexKindName]*LexEntry, - compLv int, -) (*spec.CompiledLexModeSpec, error, []*CompileError) { - var kindNames []spec.LexKindName - kindIDToName := map[spec.LexModeKindID]spec.LexKindName{} - var patterns map[spec.LexModeKindID][]byte - { - kindNames = append(kindNames, spec.LexKindNameNil) - patterns = map[spec.LexModeKindID][]byte{} - for i, e := range entries { - kindID := spec.LexModeKindID(i + 1) - - kindNames = append(kindNames, e.Kind) - kindIDToName[kindID] = e.Kind - patterns[kindID] = []byte(e.Pattern) - } - } - - push := []spec.LexModeID{ - spec.LexModeIDNil, - } - pop := []int{ - 0, - } - for _, e := range entries { - pushV := spec.LexModeIDNil - if e.Push != "" { - pushV = modeName2ID[e.Push] - } - push = append(push, pushV) - popV := 0 - if e.Pop { - popV = 1 - } - pop = append(pop, popV) - } - - fragmentPatterns := map[spec.LexKindName][]byte{} - for k, e := range fragments { - fragmentPatterns[k] = []byte(e.Pattern) - } - - fragmentCPTrees := make(map[spec.LexKindName]psr.CPTree, len(fragmentPatterns)) - { - var cerrs []*CompileError - for kind, pat := range fragmentPatterns { - p := psr.NewParser(kind, bytes.NewReader(pat)) - t, err := p.Parse() - if err != nil { - if err == psr.ParseErr { - detail, cause := p.Error() - cerrs = append(cerrs, &CompileError{ - Kind: kind, - Fragment: true, - Cause: cause, - Detail: detail, - }) - } else { - cerrs = append(cerrs, &CompileError{ - Kind: kind, - Fragment: true, - Cause: err, - }) - } - continue - } - fragmentCPTrees[kind] = t - } - if len(cerrs) > 0 { - return nil, fmt.Errorf("compile error"), cerrs - } - - err := psr.CompleteFragments(fragmentCPTrees) - if err != nil { - if err == psr.ParseErr { - for _, frag := range fragmentCPTrees { - kind, frags, err := frag.Describe() - if err != nil { - return nil, err, nil - } - - cerrs = append(cerrs, &CompileError{ - Kind: kind, - Fragment: true, - Cause: fmt.Errorf("fragment contains undefined fragments or cycles"), - Detail: fmt.Sprintf("%v", frags), - }) - } - - return nil, fmt.Errorf("compile error"), cerrs - } - - return nil, err, nil - } - } - - cpTrees := map[spec.LexModeKindID]psr.CPTree{} - { - pats := make([]*psr.PatternEntry, len(patterns)+1) - pats[spec.LexModeKindIDNil] = &psr.PatternEntry{ - ID: spec.LexModeKindIDNil, - } - for id, pattern := range patterns { - pats[id] = &psr.PatternEntry{ - ID: id, - Pattern: pattern, - } - } - - var cerrs []*CompileError - for _, pat := range pats { - if pat.ID == spec.LexModeKindIDNil { - continue - } - - p := psr.NewParser(kindIDToName[pat.ID], bytes.NewReader(pat.Pattern)) - t, err := p.Parse() - if err != nil { - if err == psr.ParseErr { - detail, cause := p.Error() - cerrs = append(cerrs, &CompileError{ - Kind: kindIDToName[pat.ID], - Fragment: false, - Cause: cause, - Detail: detail, - }) - } else { - cerrs = append(cerrs, &CompileError{ - Kind: kindIDToName[pat.ID], - Fragment: false, - Cause: err, - }) - } - continue - } - - complete, err := psr.ApplyFragments(t, fragmentCPTrees) - if err != nil { - return nil, err, nil - } - if !complete { - _, frags, err := t.Describe() - if err != nil { - return nil, err, nil - } - - cerrs = append(cerrs, &CompileError{ - Kind: kindIDToName[pat.ID], - Fragment: false, - Cause: fmt.Errorf("pattern contains undefined fragments"), - Detail: fmt.Sprintf("%v", frags), - }) - continue - } - - cpTrees[pat.ID] = t - } - if len(cerrs) > 0 { - return nil, fmt.Errorf("compile error"), cerrs - } - } - - var tranTab *spec.TransitionTable - { - root, symTab, err := dfa.ConvertCPTreeToByteTree(cpTrees) - if err != nil { - return nil, err, nil - } - d := dfa.GenDFA(root, symTab) - tranTab, err = dfa.GenTransitionTable(d) - if err != nil { - return nil, err, nil - } - } - - var err error - switch compLv { - case 2: - tranTab, err = compressTransitionTableLv2(tranTab) - if err != nil { - return nil, err, nil - } - case 1: - tranTab, err = compressTransitionTableLv1(tranTab) - if err != nil { - return nil, err, nil - } - } - - return &spec.CompiledLexModeSpec{ - KindNames: kindNames, - Push: push, - Pop: pop, - DFA: tranTab, - }, nil, nil -} - -const ( - CompressionLevelMin = 0 - CompressionLevelMax = 2 -) - -func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) { - ueTab := compressor.NewUniqueEntriesTable() - { - orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount) - if err != nil { - return nil, err - } - err = ueTab.Compress(orig) - if err != nil { - return nil, err - } - } - - rdTab := compressor.NewRowDisplacementTable(0) - { - orig, err := compressor.NewOriginalTable(ueTab.UniqueEntries, ueTab.OriginalColCount) - if err != nil { - return nil, err - } - err = rdTab.Compress(orig) - if err != nil { - return nil, err - } - } - - tranTab.Transition = &spec.UniqueEntriesTable{ - UniqueEntries: &spec.RowDisplacementTable{ - OriginalRowCount: rdTab.OriginalRowCount, - OriginalColCount: rdTab.OriginalColCount, - EmptyValue: spec.StateIDNil, - Entries: convertIntSliceToStateIDSlice(rdTab.Entries), - Bounds: rdTab.Bounds, - RowDisplacement: rdTab.RowDisplacement, - }, - RowNums: ueTab.RowNums, - OriginalRowCount: ueTab.OriginalRowCount, - OriginalColCount: ueTab.OriginalColCount, - } - tranTab.UncompressedTransition = nil - - return tranTab, nil -} - -func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) { - ueTab := compressor.NewUniqueEntriesTable() - { - orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount) - if err != nil { - return nil, err - } - err = ueTab.Compress(orig) - if err != nil { - return nil, err - } - } - - tranTab.Transition = &spec.UniqueEntriesTable{ - UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries), - RowNums: ueTab.RowNums, - OriginalRowCount: ueTab.OriginalRowCount, - OriginalColCount: ueTab.OriginalColCount, - } - tranTab.UncompressedTransition = nil - - return tranTab, nil -} - -func convertStateIDSliceToIntSlice(s []spec.StateID) []int { - is := make([]int, len(s)) - for i, v := range s { - is[i] = v.Int() - } - return is -} - -func convertIntSliceToStateIDSlice(s []int) []spec.StateID { - ss := make([]spec.StateID, len(s)) - for i, v := range s { - ss[i] = spec.StateID(v) - } - return ss -} diff --git a/grammar/lexical/compiler_test.go b/grammar/lexical/compiler_test.go deleted file mode 100644 index 3336048..0000000 --- a/grammar/lexical/compiler_test.go +++ /dev/null @@ -1,338 +0,0 @@ -package lexical - -import ( - "encoding/json" - "fmt" - "testing" - - spec "spec/grammar" -) - -func TestLexSpec_Validate(t *testing.T) { - // We expect that the spelling inconsistency error will occur. - spec := &LexSpec{ - Entries: []*LexEntry{ - { - Modes: []spec.LexModeName{ - // 'Default' is the spelling inconsistency because 'default' is predefined. - "Default", - }, - Kind: "foo", - Pattern: "foo", - }, - }, - } - err := spec.Validate() - if err == nil { - t.Fatalf("expected error didn't occur") - } -} - -func TestSnakeCaseToUpperCamelCase(t *testing.T) { - tests := []struct { - snake string - camel string - }{ - { - snake: "foo", - camel: "Foo", - }, - { - snake: "foo_bar", - camel: "FooBar", - }, - { - snake: "foo_bar_baz", - camel: "FooBarBaz", - }, - { - snake: "Foo", - camel: "Foo", - }, - { - snake: "fooBar", - camel: "FooBar", - }, - { - snake: "FOO", - camel: "FOO", - }, - { - snake: "FOO_BAR", - camel: "FOOBAR", - }, - { - snake: "_foo_bar_", - camel: "FooBar", - }, - { - snake: "___foo___bar___", - camel: "FooBar", - }, - } - for _, tt := range tests { - c := SnakeCaseToUpperCamelCase(tt.snake) - if c != tt.camel { - t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c) - } - } -} - -func TestFindSpellingInconsistencies(t *testing.T) { - tests := []struct { - ids []string - duplicated [][]string - }{ - { - ids: []string{"foo", "foo"}, - duplicated: nil, - }, - { - ids: []string{"foo", "Foo"}, - duplicated: [][]string{{"Foo", "foo"}}, - }, - { - ids: []string{"foo", "foo", "Foo"}, - duplicated: [][]string{{"Foo", "foo"}}, - }, - { - ids: []string{"foo_bar_baz", "FooBarBaz"}, - duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}}, - }, - { - ids: []string{"foo", "Foo", "bar", "Bar"}, - duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, - }, - { - ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"}, - duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}}, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - duplicated := FindSpellingInconsistencies(tt.ids) - if len(duplicated) != len(tt.duplicated) { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated) - } - for i, dupIDs := range duplicated { - if len(dupIDs) != len(tt.duplicated[i]) { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) - } - for j, id := range dupIDs { - if id != tt.duplicated[i][j] { - t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs) - } - } - } - }) - } -} - -func TestCompile(t *testing.T) { - tests := []struct { - Caption string - Spec string - Err bool - }{ - { - Caption: "allow duplicates names between fragments and non-fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "\\f{a2z}" - }, - { - "fragment": true, - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - }, - { - Caption: "don't allow duplicates names in non-fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" - }, - { - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow duplicates names in fragments", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a2z", - "pattern": "\\f{a2z}" - }, - { - "fragments": true, - "kind": "a2z", - "pattern": "a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z" - }, - { - "fragments": true, - "kind": "a2z", - "pattern": "[a-z]" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow kind names in the same mode to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow kind names across modes to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["default"], - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "modes": ["other_mode"], - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - Err: true, - }, - { - Caption: "don't allow mode names to contain spelling inconsistencies", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["foo_1"], - "kind": "a", - "pattern": "a" - }, - { - "modes": ["foo1"], - "kind": "b", - "pattern": "b" - } - ] -} -`, - Err: true, - }, - { - Caption: "allow fragment names in the same mode to contain spelling inconsistencies because fragments will not appear in output files", - Spec: ` -{ - "name": "test", - "entries": [ - { - "kind": "a", - "pattern": "a" - }, - { - "fragment": true, - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "fragment": true, - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - }, - { - Caption: "allow fragment names across modes to contain spelling inconsistencies because fragments will not appear in output files", - Spec: ` -{ - "name": "test", - "entries": [ - { - "modes": ["default"], - "kind": "a", - "pattern": "a" - }, - { - "modes": ["default"], - "fragment": true, - "kind": "foo_1", - "pattern": "foo_1" - }, - { - "modes": ["other_mode"], - "fragment": true, - "kind": "foo1", - "pattern": "foo1" - } - ] -} -`, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v %s", i, tt.Caption), func(t *testing.T) { - lspec := &LexSpec{} - err := json.Unmarshal([]byte(tt.Spec), lspec) - if err != nil { - t.Fatalf("%v", err) - } - clspec, err, _ := Compile(lspec, CompressionLevelMin) - if tt.Err { - if err == nil { - t.Fatalf("expected an error") - } - if clspec != nil { - t.Fatalf("Compile function mustn't return a compiled specification") - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if clspec == nil { - t.Fatalf("Compile function must return a compiled specification") - } - } - }) - } -} diff --git a/grammar/lexical/dfa/dfa.go b/grammar/lexical/dfa/dfa.go deleted file mode 100644 index 884b168..0000000 --- a/grammar/lexical/dfa/dfa.go +++ /dev/null @@ -1,173 +0,0 @@ -package dfa - -import ( - "sort" - - spec "spec/grammar" -) - -type symbolTable struct { - symPos2Byte map[symbolPosition]byteRange - endPos2ID map[symbolPosition]spec.LexModeKindID -} - -func genSymbolTable(root byteTree) *symbolTable { - symTab := &symbolTable{ - symPos2Byte: map[symbolPosition]byteRange{}, - endPos2ID: map[symbolPosition]spec.LexModeKindID{}, - } - return genSymTab(symTab, root) -} - -func genSymTab(symTab *symbolTable, node byteTree) *symbolTable { - if node == nil { - return symTab - } - - switch n := node.(type) { - case *symbolNode: - symTab.symPos2Byte[n.pos] = byteRange{ - from: n.from, - to: n.to, - } - case *endMarkerNode: - symTab.endPos2ID[n.pos] = n.id - default: - left, right := node.children() - genSymTab(symTab, left) - genSymTab(symTab, right) - } - return symTab -} - -type DFA struct { - States []string - InitialState string - AcceptingStatesTable map[string]spec.LexModeKindID - TransitionTable map[string][256]string -} - -func GenDFA(root byteTree, symTab *symbolTable) *DFA { - initialState := root.first() - initialStateHash := initialState.hash() - stateMap := map[string]*symbolPositionSet{ - initialStateHash: initialState, - } - tranTab := map[string][256]string{} - { - follow := genFollowTable(root) - unmarkedStates := map[string]*symbolPositionSet{ - initialStateHash: initialState, - } - for len(unmarkedStates) > 0 { - nextUnmarkedStates := map[string]*symbolPositionSet{} - for hash, state := range unmarkedStates { - tranTabOfState := [256]*symbolPositionSet{} - for _, pos := range state.set() { - if pos.isEndMark() { - continue - } - valRange := symTab.symPos2Byte[pos] - for symVal := valRange.from; symVal <= valRange.to; symVal++ { - if tranTabOfState[symVal] == nil { - tranTabOfState[symVal] = newSymbolPositionSet() - } - tranTabOfState[symVal].merge(follow[pos]) - } - } - for _, t := range tranTabOfState { - if t == nil { - continue - } - h := t.hash() - if _, ok := stateMap[h]; ok { - continue - } - stateMap[h] = t - nextUnmarkedStates[h] = t - } - tabOfState := [256]string{} - for v, t := range tranTabOfState { - if t == nil { - continue - } - tabOfState[v] = t.hash() - } - tranTab[hash] = tabOfState - } - unmarkedStates = nextUnmarkedStates - } - } - - accTab := map[string]spec.LexModeKindID{} - { - for h, s := range stateMap { - for _, pos := range s.set() { - if !pos.isEndMark() { - continue - } - priorID, ok := accTab[h] - if !ok { - accTab[h] = symTab.endPos2ID[pos] - } else { - id := symTab.endPos2ID[pos] - if id < priorID { - accTab[h] = id - } - } - } - } - } - - var states []string - { - for s := range stateMap { - states = append(states, s) - } - sort.Slice(states, func(i, j int) bool { - return states[i] < states[j] - }) - } - - return &DFA{ - States: states, - InitialState: initialStateHash, - AcceptingStatesTable: accTab, - TransitionTable: tranTab, - } -} - -func GenTransitionTable(dfa *DFA) (*spec.TransitionTable, error) { - stateHash2ID := map[string]spec.StateID{} - for i, s := range dfa.States { - // Since 0 represents an invalid value in a transition table, - // assign a number greater than or equal to 1 to states. - stateHash2ID[s] = spec.StateID(i + spec.StateIDMin.Int()) - } - - acc := make([]spec.LexModeKindID, len(dfa.States)+1) - for _, s := range dfa.States { - id, ok := dfa.AcceptingStatesTable[s] - if !ok { - continue - } - acc[stateHash2ID[s]] = id - } - - rowCount := len(dfa.States) + 1 - colCount := 256 - tran := make([]spec.StateID, rowCount*colCount) - for s, tab := range dfa.TransitionTable { - for v, to := range tab { - tran[stateHash2ID[s].Int()*256+v] = stateHash2ID[to] - } - } - - return &spec.TransitionTable{ - InitialStateID: stateHash2ID[dfa.InitialState], - AcceptingStates: acc, - UncompressedTransition: tran, - RowCount: rowCount, - ColCount: colCount, - }, nil -} diff --git a/grammar/lexical/dfa/dfa_test.go b/grammar/lexical/dfa/dfa_test.go deleted file mode 100644 index 9af9aeb..0000000 --- a/grammar/lexical/dfa/dfa_test.go +++ /dev/null @@ -1,121 +0,0 @@ -package dfa - -import ( - "strings" - "testing" - - "grammar/lexical/parser" - spec "spec/grammar" -) - -func TestGenDFA(t *testing.T) { - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - dfa := GenDFA(bt, symTab) - if dfa == nil { - t.Fatalf("DFA is nil") - } - - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) - s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) - s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) - s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) - - rune2Int := func(char rune, index int) uint8 { - return uint8([]byte(string(char))[index]) - } - - tranS0 := [256]string{} - tranS0[rune2Int('a', 0)] = s1.hash() - tranS0[rune2Int('b', 0)] = s0.hash() - - tranS1 := [256]string{} - tranS1[rune2Int('a', 0)] = s1.hash() - tranS1[rune2Int('b', 0)] = s2.hash() - - tranS2 := [256]string{} - tranS2[rune2Int('a', 0)] = s1.hash() - tranS2[rune2Int('b', 0)] = s3.hash() - - tranS3 := [256]string{} - tranS3[rune2Int('a', 0)] = s1.hash() - tranS3[rune2Int('b', 0)] = s0.hash() - - expectedTranTab := map[string][256]string{ - s0.hash(): tranS0, - s1.hash(): tranS1, - s2.hash(): tranS2, - s3.hash(): tranS3, - } - if len(dfa.TransitionTable) != len(expectedTranTab) { - t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) - } - for h, eTranTab := range expectedTranTab { - tranTab, ok := dfa.TransitionTable[h] - if !ok { - t.Errorf("no entry; hash: %v", h) - continue - } - if len(tranTab) != len(eTranTab) { - t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) - } - for c, eNext := range eTranTab { - if eNext == "" { - continue - } - - next := tranTab[c] - if next == "" { - t.Errorf("no enatry: hash: %v, char: %v", h, c) - } - if next != eNext { - t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) - } - } - } - - if dfa.InitialState != s0.hash() { - t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) - } - - accTab := map[string]spec.LexModeKindID{ - s3.hash(): 1, - } - if len(dfa.AcceptingStatesTable) != len(accTab) { - t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) - } - for eState, eID := range accTab { - id, ok := dfa.AcceptingStatesTable[eState] - if !ok { - t.Errorf("accepting state is not found: state: %v", eState) - } - if id != eID { - t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) - } - } -} diff --git a/grammar/lexical/dfa/symbol_position.go b/grammar/lexical/dfa/symbol_position.go deleted file mode 100644 index f154251..0000000 --- a/grammar/lexical/dfa/symbol_position.go +++ /dev/null @@ -1,182 +0,0 @@ -package dfa - -import ( - "encoding/binary" - "fmt" - "strings" -) - -type symbolPosition uint16 - -const ( - symbolPositionNil symbolPosition = 0x0000 - - symbolPositionMin uint16 = 0x0001 - symbolPositionMax uint16 = 0x7fff - - symbolPositionMaskSymbol uint16 = 0x0000 - symbolPositionMaskEndMark uint16 = 0x8000 - - symbolPositionMaskValue uint16 = 0x7fff -) - -func newSymbolPosition(n uint16, endMark bool) (symbolPosition, error) { - if n < symbolPositionMin || n > symbolPositionMax { - return symbolPositionNil, fmt.Errorf("symbol position must be within %v to %v: n: %v, endMark: %v", symbolPositionMin, symbolPositionMax, n, endMark) - } - if endMark { - return symbolPosition(n | symbolPositionMaskEndMark), nil - } - return symbolPosition(n | symbolPositionMaskSymbol), nil -} - -func (p symbolPosition) String() string { - if p.isEndMark() { - return fmt.Sprintf("end#%v", uint16(p)&symbolPositionMaskValue) - } - return fmt.Sprintf("sym#%v", uint16(p)&symbolPositionMaskValue) -} - -func (p symbolPosition) isEndMark() bool { - return uint16(p)&symbolPositionMaskEndMark > 1 -} - -func (p symbolPosition) describe() (uint16, bool) { - v := uint16(p) & symbolPositionMaskValue - if p.isEndMark() { - return v, true - } - return v, false -} - -type symbolPositionSet struct { - // `s` represents a set of symbol positions. - // However, immediately after adding a symbol position, the elements may be duplicated. - // When you need an aligned set with no duplicates, you can get such value via the set function. - s []symbolPosition - sorted bool -} - -func newSymbolPositionSet() *symbolPositionSet { - return &symbolPositionSet{ - s: []symbolPosition{}, - sorted: false, - } -} - -func (s *symbolPositionSet) String() string { - if len(s.s) <= 0 { - return "{}" - } - ps := s.sortAndRemoveDuplicates() - var b strings.Builder - fmt.Fprintf(&b, "{") - for i, p := range ps { - if i <= 0 { - fmt.Fprintf(&b, "%v", p) - continue - } - fmt.Fprintf(&b, ", %v", p) - } - fmt.Fprintf(&b, "}") - return b.String() -} - -func (s *symbolPositionSet) set() []symbolPosition { - s.sortAndRemoveDuplicates() - return s.s -} - -func (s *symbolPositionSet) add(pos symbolPosition) *symbolPositionSet { - s.s = append(s.s, pos) - s.sorted = false - return s -} - -func (s *symbolPositionSet) merge(t *symbolPositionSet) *symbolPositionSet { - s.s = append(s.s, t.s...) - s.sorted = false - return s -} - -func (s *symbolPositionSet) hash() string { - if len(s.s) <= 0 { - return "" - } - sorted := s.sortAndRemoveDuplicates() - var buf []byte - for _, p := range sorted { - b := make([]byte, 8) - binary.PutUvarint(b, uint64(p)) - buf = append(buf, b...) - } - // Convert to a string to be able to use it as a key of a map. - // But note this byte sequence is made from values of symbol positions, - // so this is not a well-formed UTF-8 sequence. - return string(buf) -} - -func (s *symbolPositionSet) sortAndRemoveDuplicates() []symbolPosition { - if s.sorted { - return s.s - } - - sortSymbolPositions(s.s, 0, len(s.s)-1) - - // Remove duplicates. - lastV := s.s[0] - nextIdx := 1 - for _, v := range s.s[1:] { - if v == lastV { - continue - } - s.s[nextIdx] = v - nextIdx++ - lastV = v - } - s.s = s.s[:nextIdx] - s.sorted = true - - return s.s -} - -// sortSymbolPositions sorts a slice of symbol positions as it uses quick sort. -func sortSymbolPositions(ps []symbolPosition, left, right int) { - if left >= right { - return - } - var pivot symbolPosition - { - // Use a median as a pivot. - p1 := ps[left] - p2 := ps[(left+right)/2] - p3 := ps[right] - if p1 > p2 { - p1, p2 = p2, p1 - } - if p2 > p3 { - p2 = p3 - if p1 > p2 { - p2 = p1 - } - } - pivot = p2 - } - i := left - j := right - for i <= j { - for ps[i] < pivot { - i++ - } - for ps[j] > pivot { - j-- - } - if i <= j { - ps[i], ps[j] = ps[j], ps[i] - i++ - j-- - } - } - sortSymbolPositions(ps, left, j) - sortSymbolPositions(ps, i, right) -} diff --git a/grammar/lexical/dfa/symbol_position_test.go b/grammar/lexical/dfa/symbol_position_test.go deleted file mode 100644 index c867f64..0000000 --- a/grammar/lexical/dfa/symbol_position_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package dfa - -import ( - "fmt" - "testing" -) - -func TestNewSymbolPosition(t *testing.T) { - tests := []struct { - n uint16 - endMark bool - err bool - }{ - { - n: 0, - endMark: false, - err: true, - }, - { - n: 0, - endMark: true, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: true, - err: true, - }, - { - n: symbolPositionMin, - endMark: false, - }, - { - n: symbolPositionMin, - endMark: true, - }, - { - n: symbolPositionMax, - endMark: false, - }, - { - n: symbolPositionMax, - endMark: true, - }, - { - n: symbolPositionMax + 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMax + 1, - endMark: true, - err: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { - pos, err := newSymbolPosition(tt.n, tt.endMark) - if tt.err { - if err == nil { - t.Fatal("err is nil") - } - return - } - if err != nil { - t.Fatal(err) - } - n, endMark := pos.describe() - if n != tt.n || endMark != tt.endMark { - t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) - } - }) - } -} diff --git a/grammar/lexical/dfa/tree.go b/grammar/lexical/dfa/tree.go deleted file mode 100644 index 85061f9..0000000 --- a/grammar/lexical/dfa/tree.go +++ /dev/null @@ -1,567 +0,0 @@ -package dfa - -import ( - "fmt" - "io" - "sort" - - "grammar/lexical/parser" - spec "spec/grammar" - "utf8" -) - -type byteTree interface { - fmt.Stringer - children() (byteTree, byteTree) - nullable() bool - first() *symbolPositionSet - last() *symbolPositionSet - clone() byteTree -} - -var ( - _ byteTree = &symbolNode{} - _ byteTree = &endMarkerNode{} - _ byteTree = &concatNode{} - _ byteTree = &altNode{} - _ byteTree = &repeatNode{} - _ byteTree = &optionNode{} -) - -type byteRange struct { - from byte - to byte -} - -type symbolNode struct { - byteRange - pos symbolPosition - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newSymbolNode(value byte) *symbolNode { - return &symbolNode{ - byteRange: byteRange{ - from: value, - to: value, - }, - pos: symbolPositionNil, - } -} - -func newRangeSymbolNode(from, to byte) *symbolNode { - return &symbolNode{ - byteRange: byteRange{ - from: from, - to: to, - }, - pos: symbolPositionNil, - } -} - -func (n *symbolNode) String() string { - return fmt.Sprintf("symbol: value: %v-%v, pos: %v", n.from, n.to, n.pos) -} - -func (n *symbolNode) children() (byteTree, byteTree) { - return nil, nil -} - -func (n *symbolNode) nullable() bool { - return false -} - -func (n *symbolNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.add(n.pos) - } - return n.firstMemo -} - -func (n *symbolNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.add(n.pos) - } - return n.lastMemo -} - -func (n *symbolNode) clone() byteTree { - return newRangeSymbolNode(n.from, n.to) -} - -type endMarkerNode struct { - id spec.LexModeKindID - pos symbolPosition - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newEndMarkerNode(id spec.LexModeKindID) *endMarkerNode { - return &endMarkerNode{ - id: id, - pos: symbolPositionNil, - } -} - -func (n *endMarkerNode) String() string { - return fmt.Sprintf("end: pos: %v", n.pos) -} - -func (n *endMarkerNode) children() (byteTree, byteTree) { - return nil, nil -} - -func (n *endMarkerNode) nullable() bool { - return false -} - -func (n *endMarkerNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.add(n.pos) - } - return n.firstMemo -} - -func (n *endMarkerNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.add(n.pos) - } - return n.lastMemo -} - -func (n *endMarkerNode) clone() byteTree { - return newEndMarkerNode(n.id) -} - -type concatNode struct { - left byteTree - right byteTree - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newConcatNode(left, right byteTree) *concatNode { - return &concatNode{ - left: left, - right: right, - } -} - -func (n *concatNode) String() string { - return "concat" -} - -func (n *concatNode) children() (byteTree, byteTree) { - return n.left, n.right -} - -func (n *concatNode) nullable() bool { - return n.left.nullable() && n.right.nullable() -} - -func (n *concatNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.merge(n.left.first()) - if n.left.nullable() { - n.firstMemo.merge(n.right.first()) - } - n.firstMemo.sortAndRemoveDuplicates() - } - return n.firstMemo -} - -func (n *concatNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.merge(n.right.last()) - if n.right.nullable() { - n.lastMemo.merge(n.left.last()) - } - n.lastMemo.sortAndRemoveDuplicates() - } - return n.lastMemo -} - -func (n *concatNode) clone() byteTree { - return newConcatNode(n.left.clone(), n.right.clone()) -} - -type altNode struct { - left byteTree - right byteTree - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newAltNode(left, right byteTree) *altNode { - return &altNode{ - left: left, - right: right, - } -} - -func (n *altNode) String() string { - return "alt" -} - -func (n *altNode) children() (byteTree, byteTree) { - return n.left, n.right -} - -func (n *altNode) nullable() bool { - return n.left.nullable() || n.right.nullable() -} - -func (n *altNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.merge(n.left.first()) - n.firstMemo.merge(n.right.first()) - n.firstMemo.sortAndRemoveDuplicates() - } - return n.firstMemo -} - -func (n *altNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.merge(n.left.last()) - n.lastMemo.merge(n.right.last()) - n.lastMemo.sortAndRemoveDuplicates() - } - return n.lastMemo -} - -func (n *altNode) clone() byteTree { - return newAltNode(n.left.clone(), n.right.clone()) -} - -type repeatNode struct { - left byteTree - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newRepeatNode(left byteTree) *repeatNode { - return &repeatNode{ - left: left, - } -} - -func (n *repeatNode) String() string { - return "repeat" -} - -func (n *repeatNode) children() (byteTree, byteTree) { - return n.left, nil -} - -func (n *repeatNode) nullable() bool { - return true -} - -func (n *repeatNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.merge(n.left.first()) - n.firstMemo.sortAndRemoveDuplicates() - } - return n.firstMemo -} - -func (n *repeatNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.merge(n.left.last()) - n.lastMemo.sortAndRemoveDuplicates() - } - return n.lastMemo -} - -func (n *repeatNode) clone() byteTree { - return newRepeatNode(n.left.clone()) -} - -type optionNode struct { - left byteTree - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newOptionNode(left byteTree) *optionNode { - return &optionNode{ - left: left, - } -} - -func (n *optionNode) String() string { - return "option" -} - -func (n *optionNode) children() (byteTree, byteTree) { - return n.left, nil -} - -func (n *optionNode) nullable() bool { - return true -} - -func (n *optionNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.merge(n.left.first()) - n.firstMemo.sortAndRemoveDuplicates() - } - return n.firstMemo -} - -func (n *optionNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.merge(n.left.last()) - n.lastMemo.sortAndRemoveDuplicates() - } - return n.lastMemo -} - -func (n *optionNode) clone() byteTree { - return newOptionNode(n.left.clone()) -} - -type followTable map[symbolPosition]*symbolPositionSet - -func genFollowTable(root byteTree) followTable { - follow := followTable{} - calcFollow(follow, root) - return follow -} - -func calcFollow(follow followTable, ast byteTree) { - if ast == nil { - return - } - left, right := ast.children() - calcFollow(follow, left) - calcFollow(follow, right) - switch n := ast.(type) { - case *concatNode: - l, r := n.children() - for _, p := range l.last().set() { - if _, ok := follow[p]; !ok { - follow[p] = newSymbolPositionSet() - } - follow[p].merge(r.first()) - } - case *repeatNode: - for _, p := range n.last().set() { - if _, ok := follow[p]; !ok { - follow[p] = newSymbolPositionSet() - } - follow[p].merge(n.first()) - } - } -} - -func positionSymbols(node byteTree, n uint16) (uint16, error) { - if node == nil { - return n, nil - } - - l, r := node.children() - p := n - p, err := positionSymbols(l, p) - if err != nil { - return p, err - } - p, err = positionSymbols(r, p) - if err != nil { - return p, err - } - switch n := node.(type) { - case *symbolNode: - n.pos, err = newSymbolPosition(p, false) - if err != nil { - return p, err - } - p++ - case *endMarkerNode: - n.pos, err = newSymbolPosition(p, true) - if err != nil { - return p, err - } - p++ - } - node.first() - node.last() - return p, nil -} - -func concat(ts ...byteTree) byteTree { - nonNilNodes := []byteTree{} - for _, t := range ts { - if t == nil { - continue - } - nonNilNodes = append(nonNilNodes, t) - } - if len(nonNilNodes) <= 0 { - return nil - } - if len(nonNilNodes) == 1 { - return nonNilNodes[0] - } - concat := newConcatNode(nonNilNodes[0], nonNilNodes[1]) - for _, t := range nonNilNodes[2:] { - concat = newConcatNode(concat, t) - } - return concat -} - -func oneOf(ts ...byteTree) byteTree { - nonNilNodes := []byteTree{} - for _, t := range ts { - if t == nil { - continue - } - nonNilNodes = append(nonNilNodes, t) - } - if len(nonNilNodes) <= 0 { - return nil - } - if len(nonNilNodes) == 1 { - return nonNilNodes[0] - } - alt := newAltNode(nonNilNodes[0], nonNilNodes[1]) - for _, t := range nonNilNodes[2:] { - alt = newAltNode(alt, t) - } - return alt -} - -//nolint:unused -func printByteTree(w io.Writer, t byteTree, ruledLine string, childRuledLinePrefix string, withAttrs bool) { - if t == nil { - return - } - fmt.Fprintf(w, "%v%v", ruledLine, t) - if withAttrs { - fmt.Fprintf(w, ", nullable: %v, first: %v, last: %v", t.nullable(), t.first(), t.last()) - } - fmt.Fprintf(w, "\n") - left, right := t.children() - children := []byteTree{} - if left != nil { - children = append(children, left) - } - if right != nil { - children = append(children, right) - } - num := len(children) - for i, child := range children { - line := "└─ " - if num > 1 { - if i == 0 { - line = "├─ " - } else if i < num-1 { - line = "│ " - } - } - prefix := "│ " - if i >= num-1 { - prefix = " " - } - printByteTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix, withAttrs) - } -} - -func ConvertCPTreeToByteTree(cpTrees map[spec.LexModeKindID]parser.CPTree) (byteTree, *symbolTable, error) { - var ids []spec.LexModeKindID - for id := range cpTrees { - ids = append(ids, id) - } - sort.Slice(ids, func(i, j int) bool { - return ids[i] < ids[j] - }) - - var bt byteTree - for _, id := range ids { - cpTree := cpTrees[id] - t, err := convCPTreeToByteTree(cpTree) - if err != nil { - return nil, nil, err - } - bt = oneOf(bt, concat(t, newEndMarkerNode(id))) - } - _, err := positionSymbols(bt, symbolPositionMin) - if err != nil { - return nil, nil, err - } - - return bt, genSymbolTable(bt), nil -} - -func convCPTreeToByteTree(cpTree parser.CPTree) (byteTree, error) { - if from, to, ok := cpTree.Range(); ok { - bs, err := utf8.GenCharBlocks(from, to) - if err != nil { - return nil, err - } - var a byteTree - for _, b := range bs { - var c byteTree - for i := 0; i < len(b.From); i++ { - c = concat(c, newRangeSymbolNode(b.From[i], b.To[i])) - } - a = oneOf(a, c) - } - return a, nil - } - - if tree, ok := cpTree.Repeatable(); ok { - t, err := convCPTreeToByteTree(tree) - if err != nil { - return nil, err - } - return newRepeatNode(t), nil - } - - if tree, ok := cpTree.Optional(); ok { - t, err := convCPTreeToByteTree(tree) - if err != nil { - return nil, err - } - return newOptionNode(t), nil - } - - if left, right, ok := cpTree.Concatenation(); ok { - l, err := convCPTreeToByteTree(left) - if err != nil { - return nil, err - } - r, err := convCPTreeToByteTree(right) - if err != nil { - return nil, err - } - return newConcatNode(l, r), nil - } - - if left, right, ok := cpTree.Alternatives(); ok { - l, err := convCPTreeToByteTree(left) - if err != nil { - return nil, err - } - r, err := convCPTreeToByteTree(right) - if err != nil { - return nil, err - } - return newAltNode(l, r), nil - } - - return nil, fmt.Errorf("invalid tree type: %T", cpTree) -} diff --git a/grammar/lexical/dfa/tree_test.go b/grammar/lexical/dfa/tree_test.go deleted file mode 100644 index 188fe95..0000000 --- a/grammar/lexical/dfa/tree_test.go +++ /dev/null @@ -1,257 +0,0 @@ -package dfa - -import ( - "fmt" - "strings" - "testing" - - "grammar/lexical/parser" - spec "spec/grammar" -) - -func TestByteTree(t *testing.T) { - tests := []struct { - root byteTree - nullable bool - first *symbolPositionSet - last *symbolPositionSet - }{ - { - root: newSymbolNodeWithPos(0, 1), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newEndMarkerNodeWithPos(1, 1), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newConcatNode( - newSymbolNodeWithPos(0, 1), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(2), - }, - { - root: newConcatNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(2), - }, - { - root: newConcatNode( - newSymbolNodeWithPos(0, 1), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newConcatNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newSymbolNodeWithPos(0, 1), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newSymbolNodeWithPos(0, 2), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newSymbolNodeWithPos(0, 1), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newRepeatNode(newSymbolNodeWithPos(0, 1)), - nullable: true, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newOptionNode(newSymbolNodeWithPos(0, 1)), - nullable: true, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - if tt.root.nullable() != tt.nullable { - t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable()) - } - if tt.first.hash() != tt.root.first().hash() { - t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first()) - } - if tt.last.hash() != tt.root.last().hash() { - t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last()) - } - }) - } -} - -func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode { - n := newSymbolNode(v) - n.pos = pos - return n -} - -func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode { - n := newEndMarkerNode(spec.LexModeKindID(id)) - n.pos = pos - return n -} - -func TestFollowAndSymbolTable(t *testing.T) { - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - - { - followTab := genFollowTable(bt) - if followTab == nil { - t.Fatal("follow table is nil") - } - expectedFollowTab := followTable{ - 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 3: newSymbolPositionSet().add(symPos(4)), - 4: newSymbolPositionSet().add(symPos(5)), - 5: newSymbolPositionSet().add(endPos(6)), - } - testFollowTable(t, expectedFollowTab, followTab) - } - - { - entry := func(v byte) byteRange { - return byteRange{ - from: v, - to: v, - } - } - - expectedSymTab := &symbolTable{ - symPos2Byte: map[symbolPosition]byteRange{ - symPos(1): entry(byte('a')), - symPos(2): entry(byte('b')), - symPos(3): entry(byte('a')), - symPos(4): entry(byte('b')), - symPos(5): entry(byte('b')), - }, - endPos2ID: map[symbolPosition]spec.LexModeKindID{ - endPos(6): 1, - }, - } - testSymbolTable(t, expectedSymTab, symTab) - } -} - -func testFollowTable(t *testing.T, expected, actual followTable) { - if len(actual) != len(expected) { - t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual)) - } - for ePos, eSet := range expected { - aSet, ok := actual[ePos] - if !ok { - t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet) - } - if aSet.hash() != eSet.hash() { - t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet) - } - } -} - -func testSymbolTable(t *testing.T, expected, actual *symbolTable) { - t.Helper() - - if len(actual.symPos2Byte) != len(expected.symPos2Byte) { - t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte)) - } - for ePos, eByte := range expected.symPos2Byte { - byte, ok := actual.symPos2Byte[ePos] - if !ok { - t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte) - continue - } - if byte.from != eByte.from || byte.to != eByte.to { - t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte) - } - } - - if len(actual.endPos2ID) != len(expected.endPos2ID) { - t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID)) - } - for ePos, eID := range expected.endPos2ID { - id, ok := actual.endPos2ID[ePos] - if !ok { - t.Errorf("an end position entry is not found: %v -> %v", ePos, eID) - continue - } - if id != eID { - t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id) - } - } -} diff --git a/grammar/lexical/entry.go b/grammar/lexical/entry.go deleted file mode 100644 index 6d2fbe3..0000000 --- a/grammar/lexical/entry.go +++ /dev/null @@ -1,171 +0,0 @@ -package lexical - -import ( - "fmt" - "sort" - "strings" - - spec "spec/grammar" -) - -type LexEntry struct { - Kind spec.LexKindName - Pattern string - Modes []spec.LexModeName - Push spec.LexModeName - Pop bool - Fragment bool -} - -type LexSpec struct { - Entries []*LexEntry -} - -func (s *LexSpec) Validate() error { - if len(s.Entries) <= 0 { - return fmt.Errorf("the lexical specification must have at least one entry") - } - { - ks := map[string]struct{}{} - fks := map[string]struct{}{} - for _, e := range s.Entries { - // Allow duplicate names between fragments and non-fragments. - if e.Fragment { - if _, exist := fks[e.Kind.String()]; exist { - return fmt.Errorf("kinds `%v` are duplicates", e.Kind) - } - fks[e.Kind.String()] = struct{}{} - } else { - if _, exist := ks[e.Kind.String()]; exist { - return fmt.Errorf("kinds `%v` are duplicates", e.Kind) - } - ks[e.Kind.String()] = struct{}{} - } - } - } - { - kinds := []string{} - modes := []string{ - spec.LexModeNameDefault.String(), // This is a predefined mode. - } - for _, e := range s.Entries { - if e.Fragment { - continue - } - - kinds = append(kinds, e.Kind.String()) - - for _, m := range e.Modes { - modes = append(modes, m.String()) - } - } - - kindErrs := findSpellingInconsistenciesErrors(kinds, nil) - modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error { - if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(spec.LexModeNameDefault.String()) { - var b strings.Builder - fmt.Fprintf(&b, "%+v", ids[0]) - for _, id := range ids[1:] { - fmt.Fprintf(&b, ", %+v", id) - } - return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", spec.LexModeNameDefault, b.String()) - } - return nil - }) - errs := append(kindErrs, modeErrs...) - if len(errs) > 0 { - var b strings.Builder - fmt.Fprintf(&b, "%v", errs[0]) - for _, err := range errs[1:] { - fmt.Fprintf(&b, "\n%v", err) - } - return fmt.Errorf(b.String()) - } - } - - return nil -} - -func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error { - duplicated := FindSpellingInconsistencies(ids) - if len(duplicated) == 0 { - return nil - } - - var errs []error - for _, dup := range duplicated { - if hook != nil { - err := hook(dup) - if err != nil { - errs = append(errs, err) - continue - } - } - - var b strings.Builder - fmt.Fprintf(&b, "%+v", dup[0]) - for _, id := range dup[1:] { - fmt.Fprintf(&b, ", %+v", id) - } - err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String()) - errs = append(errs, err) - } - - return errs -} - -// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same -// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same -// in UpperCamelCase. Thus they are considere to be spelling inconsistency. -func FindSpellingInconsistencies(ids []string) [][]string { - m := map[string][]string{} - for _, id := range removeDuplicates(ids) { - c := SnakeCaseToUpperCamelCase(id) - m[c] = append(m[c], id) - } - - var duplicated [][]string - for _, camels := range m { - if len(camels) == 1 { - continue - } - duplicated = append(duplicated, camels) - } - - for _, dup := range duplicated { - sort.Slice(dup, func(i, j int) bool { - return dup[i] < dup[j] - }) - } - sort.Slice(duplicated, func(i, j int) bool { - return duplicated[i][0] < duplicated[j][0] - }) - - return duplicated -} - -func removeDuplicates(s []string) []string { - m := map[string]struct{}{} - for _, v := range s { - m[v] = struct{}{} - } - - var unique []string - for v := range m { - unique = append(unique, v) - } - - return unique -} - -func SnakeCaseToUpperCamelCase(snake string) string { - elems := strings.Split(snake, "_") - for i, e := range elems { - if len(e) == 0 { - continue - } - elems[i] = strings.ToUpper(string(e[0])) + e[1:] - } - - return strings.Join(elems, "") -} diff --git a/grammar/lexical/parser/error.go b/grammar/lexical/parser/error.go deleted file mode 100644 index be81da4..0000000 --- a/grammar/lexical/parser/error.go +++ /dev/null @@ -1,36 +0,0 @@ -package parser - -import "fmt" - -var ( - ParseErr = fmt.Errorf("parse error") - - // lexical errors - synErrIncompletedEscSeq = fmt.Errorf("incompleted escape sequence; unexpected EOF following \\") - synErrInvalidEscSeq = fmt.Errorf("invalid escape sequence") - synErrInvalidCodePoint = fmt.Errorf("code points must consist of just 4 or 6 hex digits") - synErrCharPropInvalidSymbol = fmt.Errorf("invalid character property symbol") - SynErrFragmentInvalidSymbol = fmt.Errorf("invalid fragment symbol") - - // syntax errors - synErrUnexpectedToken = fmt.Errorf("unexpected token") - synErrNullPattern = fmt.Errorf("a pattern must be a non-empty byte sequence") - synErrUnmatchablePattern = fmt.Errorf("a pattern cannot match any characters") - synErrAltLackOfOperand = fmt.Errorf("an alternation expression must have operands") - synErrRepNoTarget = fmt.Errorf("a repeat expression must have an operand") - synErrGroupNoElem = fmt.Errorf("a grouping expression must include at least one character") - synErrGroupUnclosed = fmt.Errorf("unclosed grouping expression") - synErrGroupNoInitiator = fmt.Errorf(") needs preceding (") - synErrGroupInvalidForm = fmt.Errorf("invalid grouping expression") - synErrBExpNoElem = fmt.Errorf("a bracket expression must include at least one character") - synErrBExpUnclosed = fmt.Errorf("unclosed bracket expression") - synErrBExpInvalidForm = fmt.Errorf("invalid bracket expression") - synErrRangeInvalidOrder = fmt.Errorf("a range expression with invalid order") - synErrRangePropIsUnavailable = fmt.Errorf("a property expression is unavailable in a range expression") - synErrRangeInvalidForm = fmt.Errorf("invalid range expression") - synErrCPExpInvalidForm = fmt.Errorf("invalid code point expression") - synErrCPExpOutOfRange = fmt.Errorf("a code point must be between U+0000 to U+10FFFF") - synErrCharPropExpInvalidForm = fmt.Errorf("invalid character property expression") - synErrCharPropUnsupported = fmt.Errorf("unsupported character property") - synErrFragmentExpInvalidForm = fmt.Errorf("invalid fragment expression") -) diff --git a/grammar/lexical/parser/fragment.go b/grammar/lexical/parser/fragment.go deleted file mode 100644 index e51759e..0000000 --- a/grammar/lexical/parser/fragment.go +++ /dev/null @@ -1,72 +0,0 @@ -package parser - -import ( - "fmt" - - spec "spec/grammar" -) - -type incompleteFragment struct { - kind spec.LexKindName - root *rootNode -} - -func CompleteFragments(fragments map[spec.LexKindName]CPTree) error { - if len(fragments) == 0 { - return nil - } - - completeFragments := map[spec.LexKindName]CPTree{} - incompleteFragments := []*incompleteFragment{} - for kind, tree := range fragments { - root, ok := tree.(*rootNode) - if !ok { - return fmt.Errorf("CompleteFragments can take only *rootNode: %T", tree) - } - if root.incomplete() { - incompleteFragments = append(incompleteFragments, &incompleteFragment{ - kind: kind, - root: root, - }) - } else { - completeFragments[kind] = root - } - } - for len(incompleteFragments) > 0 { - lastIncompCount := len(incompleteFragments) - remainingFragments := []*incompleteFragment{} - for _, e := range incompleteFragments { - complete, err := ApplyFragments(e.root, completeFragments) - if err != nil { - return err - } - if !complete { - remainingFragments = append(remainingFragments, e) - } else { - completeFragments[e.kind] = e.root - } - } - incompleteFragments = remainingFragments - if len(incompleteFragments) == lastIncompCount { - return ParseErr - } - } - - return nil -} - -func ApplyFragments(t CPTree, fragments map[spec.LexKindName]CPTree) (bool, error) { - root, ok := t.(*rootNode) - if !ok { - return false, fmt.Errorf("ApplyFragments can take only *rootNode type: %T", t) - } - - for name, frag := range fragments { - err := root.applyFragment(name, frag) - if err != nil { - return false, err - } - } - - return !root.incomplete(), nil -} diff --git a/grammar/lexical/parser/lexer.go b/grammar/lexical/parser/lexer.go deleted file mode 100644 index 3861825..0000000 --- a/grammar/lexical/parser/lexer.go +++ /dev/null @@ -1,594 +0,0 @@ -package parser - -import ( - "bufio" - "fmt" - "io" - "strings" -) - -type tokenKind string - -const ( - tokenKindChar tokenKind = "char" - tokenKindAnyChar tokenKind = "." - tokenKindRepeat tokenKind = "*" - tokenKindRepeatOneOrMore tokenKind = "+" - tokenKindOption tokenKind = "?" - tokenKindAlt tokenKind = "|" - tokenKindGroupOpen tokenKind = "(" - tokenKindGroupClose tokenKind = ")" - tokenKindBExpOpen tokenKind = "[" - tokenKindInverseBExpOpen tokenKind = "[^" - tokenKindBExpClose tokenKind = "]" - tokenKindCharRange tokenKind = "-" - tokenKindCodePointLeader tokenKind = "\\u" - tokenKindCharPropLeader tokenKind = "\\p" - tokenKindFragmentLeader tokenKind = "\\f" - tokenKindLBrace tokenKind = "{" - tokenKindRBrace tokenKind = "}" - tokenKindEqual tokenKind = "=" - tokenKindCodePoint tokenKind = "code point" - tokenKindCharPropSymbol tokenKind = "character property symbol" - tokenKindFragmentSymbol tokenKind = "fragment symbol" - tokenKindEOF tokenKind = "eof" -) - -type token struct { - kind tokenKind - char rune - propSymbol string - codePoint string - fragmentSymbol string -} - -const nullChar = '\u0000' - -func newToken(kind tokenKind, char rune) *token { - return &token{ - kind: kind, - char: char, - } -} - -func newCodePointToken(codePoint string) *token { - return &token{ - kind: tokenKindCodePoint, - codePoint: codePoint, - } -} - -func newCharPropSymbolToken(propSymbol string) *token { - return &token{ - kind: tokenKindCharPropSymbol, - propSymbol: propSymbol, - } -} - -func newFragmentSymbolToken(fragmentSymbol string) *token { - return &token{ - kind: tokenKindFragmentSymbol, - fragmentSymbol: fragmentSymbol, - } -} - -type lexerMode string - -const ( - lexerModeDefault lexerMode = "default" - lexerModeBExp lexerMode = "bracket expression" - lexerModeCPExp lexerMode = "code point expression" - lexerModeCharPropExp lexerMode = "character property expression" - lexerModeFragmentExp lexerMode = "fragment expression" -) - -type lexerModeStack struct { - stack []lexerMode -} - -func newLexerModeStack() *lexerModeStack { - return &lexerModeStack{ - stack: []lexerMode{ - lexerModeDefault, - }, - } -} - -func (s *lexerModeStack) top() lexerMode { - return s.stack[len(s.stack)-1] -} - -func (s *lexerModeStack) push(m lexerMode) { - s.stack = append(s.stack, m) -} - -func (s *lexerModeStack) pop() { - s.stack = s.stack[:len(s.stack)-1] -} - -type rangeState string - -// [a-z] -// ^^^^ -// |||`-- ready -// ||`-- expect range terminator -// |`-- read range initiator -// `-- ready -const ( - rangeStateReady rangeState = "ready" - rangeStateReadRangeInitiator rangeState = "read range initiator" - rangeStateExpectRangeTerminator rangeState = "expect range terminator" -) - -type lexer struct { - src *bufio.Reader - peekChar2 rune - peekEOF2 bool - peekChar1 rune - peekEOF1 bool - lastChar rune - reachedEOF bool - prevChar1 rune - prevEOF1 bool - prevChar2 rune - pervEOF2 bool - modeStack *lexerModeStack - rangeState rangeState - - errCause error - errDetail string -} - -func newLexer(src io.Reader) *lexer { - return &lexer{ - src: bufio.NewReader(src), - peekChar2: nullChar, - peekEOF2: false, - peekChar1: nullChar, - peekEOF1: false, - lastChar: nullChar, - reachedEOF: false, - prevChar1: nullChar, - prevEOF1: false, - prevChar2: nullChar, - pervEOF2: false, - modeStack: newLexerModeStack(), - rangeState: rangeStateReady, - } -} - -func (l *lexer) error() (string, error) { - return l.errDetail, l.errCause -} - -func (l *lexer) next() (*token, error) { - c, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - return newToken(tokenKindEOF, nullChar), nil - } - - switch l.modeStack.top() { - case lexerModeBExp: - tok, err := l.nextInBExp(c) - if err != nil { - return nil, err - } - if tok.kind == tokenKindChar || tok.kind == tokenKindCodePointLeader || tok.kind == tokenKindCharPropLeader { - switch l.rangeState { - case rangeStateReady: - l.rangeState = rangeStateReadRangeInitiator - case rangeStateExpectRangeTerminator: - l.rangeState = rangeStateReady - } - } - switch tok.kind { - case tokenKindBExpClose: - l.modeStack.pop() - case tokenKindCharRange: - l.rangeState = rangeStateExpectRangeTerminator - case tokenKindCodePointLeader: - l.modeStack.push(lexerModeCPExp) - case tokenKindCharPropLeader: - l.modeStack.push(lexerModeCharPropExp) - } - return tok, nil - case lexerModeCPExp: - tok, err := l.nextInCodePoint(c) - if err != nil { - return nil, err - } - switch tok.kind { - case tokenKindRBrace: - l.modeStack.pop() - } - return tok, nil - case lexerModeCharPropExp: - tok, err := l.nextInCharProp(c) - if err != nil { - return nil, err - } - switch tok.kind { - case tokenKindRBrace: - l.modeStack.pop() - } - return tok, nil - case lexerModeFragmentExp: - tok, err := l.nextInFragment(c) - if err != nil { - return nil, err - } - switch tok.kind { - case tokenKindRBrace: - l.modeStack.pop() - } - return tok, nil - default: - tok, err := l.nextInDefault(c) - if err != nil { - return nil, err - } - switch tok.kind { - case tokenKindBExpOpen: - l.modeStack.push(lexerModeBExp) - l.rangeState = rangeStateReady - case tokenKindInverseBExpOpen: - l.modeStack.push(lexerModeBExp) - l.rangeState = rangeStateReady - case tokenKindCodePointLeader: - l.modeStack.push(lexerModeCPExp) - case tokenKindCharPropLeader: - l.modeStack.push(lexerModeCharPropExp) - case tokenKindFragmentLeader: - l.modeStack.push(lexerModeFragmentExp) - } - return tok, nil - } -} - -func (l *lexer) nextInDefault(c rune) (*token, error) { - switch c { - case '*': - return newToken(tokenKindRepeat, nullChar), nil - case '+': - return newToken(tokenKindRepeatOneOrMore, nullChar), nil - case '?': - return newToken(tokenKindOption, nullChar), nil - case '.': - return newToken(tokenKindAnyChar, nullChar), nil - case '|': - return newToken(tokenKindAlt, nullChar), nil - case '(': - return newToken(tokenKindGroupOpen, nullChar), nil - case ')': - return newToken(tokenKindGroupClose, nullChar), nil - case '[': - c1, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - err := l.restore() - if err != nil { - return nil, err - } - return newToken(tokenKindBExpOpen, nullChar), nil - } - if c1 != '^' { - err := l.restore() - if err != nil { - return nil, err - } - return newToken(tokenKindBExpOpen, nullChar), nil - } - c2, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - err := l.restore() - if err != nil { - return nil, err - } - return newToken(tokenKindInverseBExpOpen, nullChar), nil - } - if c2 != ']' { - err := l.restore() - if err != nil { - return nil, err - } - return newToken(tokenKindInverseBExpOpen, nullChar), nil - } - err = l.restore() - if err != nil { - return nil, err - } - err = l.restore() - if err != nil { - return nil, err - } - return newToken(tokenKindBExpOpen, nullChar), nil - case '\\': - c, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - l.errCause = synErrIncompletedEscSeq - return nil, ParseErr - } - if c == 'u' { - return newToken(tokenKindCodePointLeader, nullChar), nil - } - if c == 'p' { - return newToken(tokenKindCharPropLeader, nullChar), nil - } - if c == 'f' { - return newToken(tokenKindFragmentLeader, nullChar), nil - } - if c == '\\' || c == '.' || c == '*' || c == '+' || c == '?' || c == '|' || c == '(' || c == ')' || c == '[' || c == ']' { - return newToken(tokenKindChar, c), nil - } - l.errCause = synErrInvalidEscSeq - l.errDetail = fmt.Sprintf("\\%v is not supported", string(c)) - return nil, ParseErr - default: - return newToken(tokenKindChar, c), nil - } -} - -func (l *lexer) nextInBExp(c rune) (*token, error) { - switch c { - case '-': - if l.rangeState != rangeStateReadRangeInitiator { - return newToken(tokenKindChar, c), nil - } - c1, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - err := l.restore() - if err != nil { - return nil, err - } - return newToken(tokenKindChar, c), nil - } - if c1 != ']' { - err := l.restore() - if err != nil { - return nil, err - } - return newToken(tokenKindCharRange, nullChar), nil - } - err = l.restore() - if err != nil { - return nil, err - } - return newToken(tokenKindChar, c), nil - case ']': - return newToken(tokenKindBExpClose, nullChar), nil - case '\\': - c, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - l.errCause = synErrIncompletedEscSeq - return nil, ParseErr - } - if c == 'u' { - return newToken(tokenKindCodePointLeader, nullChar), nil - } - if c == 'p' { - return newToken(tokenKindCharPropLeader, nullChar), nil - } - if c == '\\' || c == '^' || c == '-' || c == ']' { - return newToken(tokenKindChar, c), nil - } - l.errCause = synErrInvalidEscSeq - l.errDetail = fmt.Sprintf("\\%v is not supported in a bracket expression", string(c)) - return nil, ParseErr - default: - return newToken(tokenKindChar, c), nil - } -} - -func (l *lexer) nextInCodePoint(c rune) (*token, error) { - switch c { - case '{': - return newToken(tokenKindLBrace, nullChar), nil - case '}': - return newToken(tokenKindRBrace, nullChar), nil - default: - if !isHexDigit(c) { - l.errCause = synErrInvalidCodePoint - return nil, ParseErr - } - var b strings.Builder - fmt.Fprint(&b, string(c)) - n := 1 - for { - c, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - err := l.restore() - if err != nil { - return nil, err - } - break - } - if c == '}' { - err := l.restore() - if err != nil { - return nil, err - } - break - } - if !isHexDigit(c) || n >= 6 { - l.errCause = synErrInvalidCodePoint - return nil, ParseErr - } - fmt.Fprint(&b, string(c)) - n++ - } - cp := b.String() - cpLen := len(cp) - if !(cpLen == 4 || cpLen == 6) { - l.errCause = synErrInvalidCodePoint - return nil, ParseErr - } - return newCodePointToken(b.String()), nil - } -} - -func isHexDigit(c rune) bool { - if c >= '0' && c <= '9' || c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z' { - return true - } - return false -} - -func (l *lexer) nextInCharProp(c rune) (*token, error) { - switch c { - case '{': - return newToken(tokenKindLBrace, nullChar), nil - case '}': - return newToken(tokenKindRBrace, nullChar), nil - case '=': - return newToken(tokenKindEqual, nullChar), nil - default: - var b strings.Builder - fmt.Fprint(&b, string(c)) - n := 1 - for { - c, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - err := l.restore() - if err != nil { - return nil, err - } - break - } - if c == '}' || c == '=' { - err := l.restore() - if err != nil { - return nil, err - } - break - } - fmt.Fprint(&b, string(c)) - n++ - } - sym := strings.TrimSpace(b.String()) - if len(sym) == 0 { - l.errCause = synErrCharPropInvalidSymbol - return nil, ParseErr - } - return newCharPropSymbolToken(sym), nil - } -} - -func (l *lexer) nextInFragment(c rune) (*token, error) { - switch c { - case '{': - return newToken(tokenKindLBrace, nullChar), nil - case '}': - return newToken(tokenKindRBrace, nullChar), nil - default: - var b strings.Builder - fmt.Fprint(&b, string(c)) - n := 1 - for { - c, eof, err := l.read() - if err != nil { - return nil, err - } - if eof { - err := l.restore() - if err != nil { - return nil, err - } - break - } - if c == '}' { - err := l.restore() - if err != nil { - return nil, err - } - break - } - fmt.Fprint(&b, string(c)) - n++ - } - sym := strings.TrimSpace(b.String()) - if len(sym) == 0 { - l.errCause = SynErrFragmentInvalidSymbol - return nil, ParseErr - } - return newFragmentSymbolToken(sym), nil - } -} - -func (l *lexer) read() (rune, bool, error) { - if l.reachedEOF { - return l.lastChar, l.reachedEOF, nil - } - if l.peekChar1 != nullChar || l.peekEOF1 { - l.prevChar2 = l.prevChar1 - l.pervEOF2 = l.prevEOF1 - l.prevChar1 = l.lastChar - l.prevEOF1 = l.reachedEOF - l.lastChar = l.peekChar1 - l.reachedEOF = l.peekEOF1 - l.peekChar1 = l.peekChar2 - l.peekEOF1 = l.peekEOF2 - l.peekChar2 = nullChar - l.peekEOF2 = false - return l.lastChar, l.reachedEOF, nil - } - c, _, err := l.src.ReadRune() - if err != nil { - if err == io.EOF { - l.prevChar2 = l.prevChar1 - l.pervEOF2 = l.prevEOF1 - l.prevChar1 = l.lastChar - l.prevEOF1 = l.reachedEOF - l.lastChar = nullChar - l.reachedEOF = true - return l.lastChar, l.reachedEOF, nil - } - return nullChar, false, err - } - l.prevChar2 = l.prevChar1 - l.pervEOF2 = l.prevEOF1 - l.prevChar1 = l.lastChar - l.prevEOF1 = l.reachedEOF - l.lastChar = c - l.reachedEOF = false - return l.lastChar, l.reachedEOF, nil -} - -func (l *lexer) restore() error { - if l.lastChar == nullChar && !l.reachedEOF { - return fmt.Errorf("failed to call restore() because the last character is null") - } - l.peekChar2 = l.peekChar1 - l.peekEOF2 = l.peekEOF1 - l.peekChar1 = l.lastChar - l.peekEOF1 = l.reachedEOF - l.lastChar = l.prevChar1 - l.reachedEOF = l.prevEOF1 - l.prevChar1 = l.prevChar2 - l.prevEOF1 = l.pervEOF2 - l.prevChar2 = nullChar - l.pervEOF2 = false - return nil -} diff --git a/grammar/lexical/parser/lexer_test.go b/grammar/lexical/parser/lexer_test.go deleted file mode 100644 index 055466e..0000000 --- a/grammar/lexical/parser/lexer_test.go +++ /dev/null @@ -1,524 +0,0 @@ -package parser - -import ( - "strings" - "testing" -) - -func TestLexer(t *testing.T) { - tests := []struct { - caption string - src string - tokens []*token - err error - }{ - { - caption: "lexer can recognize ordinaly characters", - src: "123abcいろは", - tokens: []*token{ - newToken(tokenKindChar, '1'), - newToken(tokenKindChar, '2'), - newToken(tokenKindChar, '3'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, 'b'), - newToken(tokenKindChar, 'c'), - newToken(tokenKindChar, 'い'), - newToken(tokenKindChar, 'ろ'), - newToken(tokenKindChar, 'は'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in default mode", - src: ".*+?|()[\\u", - tokens: []*token{ - newToken(tokenKindAnyChar, nullChar), - newToken(tokenKindRepeat, nullChar), - newToken(tokenKindRepeatOneOrMore, nullChar), - newToken(tokenKindOption, nullChar), - newToken(tokenKindAlt, nullChar), - newToken(tokenKindGroupOpen, nullChar), - newToken(tokenKindGroupClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in default mode", - src: "\\\\\\.\\*\\+\\?\\|\\(\\)\\[", - tokens: []*token{ - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "], {, and } are treated as an ordinary character in default mode", - src: "]{}", - tokens: []*token{ - newToken(tokenKindChar, ']'), - newToken(tokenKindChar, '{'), - newToken(tokenKindChar, '}'), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters in bracket expression mode", - src: "[a-z\\u{09AF}][^a-z\\u{09abcf}]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09AF"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("09abcf"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the escape sequences in bracket expression mode", - src: "[\\^a\\-z]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "in a bracket expression, the special characters are also handled as normal characters", - src: "[\\\\.*+?|()[", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '\\'), - newToken(tokenKindChar, '.'), - newToken(tokenKindChar, '*'), - newToken(tokenKindChar, '+'), - newToken(tokenKindChar, '?'), - newToken(tokenKindChar, '|'), - newToken(tokenKindChar, '('), - newToken(tokenKindChar, ')'), - newToken(tokenKindChar, '['), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", - // [...-...][...-][-...][-] - // ~~~~~~~ ~ ~ ~ - // ^ ^ ^ ^ - // | | | `-- Ordinary Character (b) - // | | `-- Ordinary Character (b) - // | `-- Ordinary Character (b) - // `-- Character Range (a) - // - // a. *-* is handled as a character-range expression. - // b. *-, -*, or - are handled as ordinary characters. - src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, 'a'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, 'z'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindCharRange, nullChar), - newToken(tokenKindChar, '-'), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", - // [^...^...][^] - // ~~ ~ ~~ - // ^ ^ ^^ - // | | |`-- Ordinary Character (c) - // | | `-- Bracket Expression - // | `-- Ordinary Character (b) - // `-- Inverse Bracket Expression (a) - // - // a. Bracket expressions that have a caret symbol at the beginning are handled as logical inverse expressions. - // b. caret symbols that appear as the second and the subsequent symbols are handled as ordinary symbols. - // c. When a bracket expression has just one symbol, a caret symbol at the beginning is handled as an ordinary character. - src: "[^^][^]", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindChar, '^'), - newToken(tokenKindBExpClose, nullChar), - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "\\@", - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "\\", - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer raises an error when an invalid escape sequence appears", - src: "[\\@", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "lexer raises an error when the incomplete escape sequence (EOF following \\) appears", - src: "[\\", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrIncompletedEscSeq, - }, - { - caption: "lexer can recognize the special characters and code points in code point expression mode", - src: "\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}[\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}][^\\u{0123}\\u{4567}\\u{89abcd}\\u{efAB}\\u{CDEF01}]", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("4567"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("89abcd"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("efAB"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("CDEF01"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a one digit hex string isn't a valid code point", - src: "\\u{0", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a two digits hex string isn't a valid code point", - src: "\\u{01", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a three digits hex string isn't a valid code point", - src: "\\u{012", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a four digits hex string is a valid code point", - src: "\\u{0123}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("0123"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a five digits hex string isn't a valid code point", - src: "\\u{01234", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a six digits hex string is a valid code point", - src: "\\u{012345}", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCodePointToken("012345"), - newToken(tokenKindRBrace, nullChar), - }, - }, - { - caption: "a seven digits hex string isn't a valid code point", - src: "\\u{0123456", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{g", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "a code point must be hex digits", - src: "\\u{G", - tokens: []*token{ - newToken(tokenKindCodePointLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - }, - err: synErrInvalidCodePoint, - }, - { - caption: "lexer can recognize the special characters and symbols in character property expression mode", - src: "\\p{Letter}\\p{General_Category=Letter}[\\p{Letter}\\p{General_Category=Letter}][^\\p{Letter}\\p{General_Category=Letter}]", - tokens: []*token{ - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindInverseBExpOpen, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindCharPropLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newCharPropSymbolToken("General_Category"), - newToken(tokenKindEqual, nullChar), - newCharPropSymbolToken("Letter"), - newToken(tokenKindRBrace, nullChar), - newToken(tokenKindBExpClose, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "lexer can recognize the special characters and symbols in fragment expression mode", - src: "\\f{integer}", - tokens: []*token{ - newToken(tokenKindFragmentLeader, nullChar), - newToken(tokenKindLBrace, nullChar), - newFragmentSymbolToken("integer"), - newToken(tokenKindRBrace, nullChar), - - newToken(tokenKindEOF, nullChar), - }, - }, - { - caption: "a fragment expression is not supported in a bracket expression", - src: "[\\f", - tokens: []*token{ - newToken(tokenKindBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - { - caption: "a fragment expression is not supported in an inverse bracket expression", - src: "[^\\f", - tokens: []*token{ - newToken(tokenKindInverseBExpOpen, nullChar), - }, - err: synErrInvalidEscSeq, - }, - } - for _, tt := range tests { - t.Run(tt.caption, func(t *testing.T) { - lex := newLexer(strings.NewReader(tt.src)) - var err error - var tok *token - i := 0 - for { - tok, err = lex.next() - if err != nil { - break - } - if i >= len(tt.tokens) { - break - } - eTok := tt.tokens[i] - i++ - testToken(t, tok, eTok) - - if tok.kind == tokenKindEOF { - break - } - } - if tt.err != nil { - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - detail, cause := lex.error() - if cause != tt.err { - t.Fatalf("unexpected error: want: %v, got: %v (%v)", tt.err, cause, detail) - } - } else { - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - } - if i < len(tt.tokens) { - t.Fatalf("expecte more tokens") - } - }) - } -} - -func testToken(t *testing.T, a, e *token) { - t.Helper() - if e.kind != a.kind || e.char != a.char || e.codePoint != a.codePoint { - t.Fatalf("unexpected token: want: %+v, got: %+v", e, a) - } -} diff --git a/grammar/lexical/parser/parser.go b/grammar/lexical/parser/parser.go deleted file mode 100644 index d1a08a2..0000000 --- a/grammar/lexical/parser/parser.go +++ /dev/null @@ -1,531 +0,0 @@ -package parser - -import ( - "bytes" - "fmt" - "io" - "strconv" - - spec "spec/grammar" - "ucd" -) - -type PatternEntry struct { - ID spec.LexModeKindID - Pattern []byte -} - -type parser struct { - kind spec.LexKindName - lex *lexer - peekedTok *token - lastTok *token - - // If and only if isContributoryPropertyExposed is true, the parser interprets contributory properties that - // appear in property expressions. - // - // The contributory properties are not exposed, and users cannot use those properties because the parser - // follows [UAX #44 5.13 Property APIs]. For instance, \p{Other_Alphabetic} is invalid. - // - // isContributoryPropertyExposed is set to true when the parser is generated recursively. The parser needs to - // interpret derived properties internally because the derived properties consist of other properties that - // may contain the contributory properties. - // - // [UAX #44 5.13 Property APIs] says: - // > The following subtypes of Unicode character properties should generally not be exposed in APIs, - // > except in limited circumstances. They may not be useful, particularly in public API collections, - // > and may instead prove misleading to the users of such API collections. - // > * Contributory properties are not recommended for public APIs. - // > ... - // https://unicode.org/reports/tr44/#Property_APIs - isContributoryPropertyExposed bool - - errCause error - errDetail string -} - -func NewParser(kind spec.LexKindName, src io.Reader) *parser { - return &parser{ - kind: kind, - lex: newLexer(src), - isContributoryPropertyExposed: false, - } -} - -func (p *parser) exposeContributoryProperty() { - p.isContributoryPropertyExposed = true -} - -func (p *parser) Error() (string, error) { - return p.errDetail, p.errCause -} - -func (p *parser) Parse() (root CPTree, retErr error) { - defer func() { - err := recover() - if err != nil { - var ok bool - retErr, ok = err.(error) - if !ok { - panic(err) - } - return - } - }() - - return newRootNode(p.kind, p.parseRegexp()), nil -} - -func (p *parser) parseRegexp() CPTree { - alt := p.parseAlt() - if alt == nil { - if p.consume(tokenKindGroupClose) { - p.raiseParseError(synErrGroupNoInitiator, "") - } - p.raiseParseError(synErrNullPattern, "") - } - if p.consume(tokenKindGroupClose) { - p.raiseParseError(synErrGroupNoInitiator, "") - } - p.expect(tokenKindEOF) - return alt -} - -func (p *parser) parseAlt() CPTree { - left := p.parseConcat() - if left == nil { - if p.consume(tokenKindAlt) { - p.raiseParseError(synErrAltLackOfOperand, "") - } - return nil - } - for { - if !p.consume(tokenKindAlt) { - break - } - right := p.parseConcat() - if right == nil { - p.raiseParseError(synErrAltLackOfOperand, "") - } - left = newAltNode(left, right) - } - return left -} - -func (p *parser) parseConcat() CPTree { - left := p.parseRepeat() - for { - right := p.parseRepeat() - if right == nil { - break - } - left = newConcatNode(left, right) - } - return left -} - -func (p *parser) parseRepeat() CPTree { - group := p.parseGroup() - if group == nil { - if p.consume(tokenKindRepeat) { - p.raiseParseError(synErrRepNoTarget, "* needs an operand") - } - if p.consume(tokenKindRepeatOneOrMore) { - p.raiseParseError(synErrRepNoTarget, "+ needs an operand") - } - if p.consume(tokenKindOption) { - p.raiseParseError(synErrRepNoTarget, "? needs an operand") - } - return nil - } - if p.consume(tokenKindRepeat) { - return newRepeatNode(group) - } - if p.consume(tokenKindRepeatOneOrMore) { - return newRepeatOneOrMoreNode(group) - } - if p.consume(tokenKindOption) { - return newOptionNode(group) - } - return group -} - -func (p *parser) parseGroup() CPTree { - if p.consume(tokenKindGroupOpen) { - alt := p.parseAlt() - if alt == nil { - if p.consume(tokenKindEOF) { - p.raiseParseError(synErrGroupUnclosed, "") - } - p.raiseParseError(synErrGroupNoElem, "") - } - if p.consume(tokenKindEOF) { - p.raiseParseError(synErrGroupUnclosed, "") - } - if !p.consume(tokenKindGroupClose) { - p.raiseParseError(synErrGroupInvalidForm, "") - } - return alt - } - return p.parseSingleChar() -} - -func (p *parser) parseSingleChar() CPTree { - if p.consume(tokenKindAnyChar) { - return genAnyCharAST() - } - if p.consume(tokenKindBExpOpen) { - left := p.parseBExpElem() - if left == nil { - if p.consume(tokenKindEOF) { - p.raiseParseError(synErrBExpUnclosed, "") - } - p.raiseParseError(synErrBExpNoElem, "") - } - for { - right := p.parseBExpElem() - if right == nil { - break - } - left = newAltNode(left, right) - } - if p.consume(tokenKindEOF) { - p.raiseParseError(synErrBExpUnclosed, "") - } - p.expect(tokenKindBExpClose) - return left - } - if p.consume(tokenKindInverseBExpOpen) { - elem := p.parseBExpElem() - if elem == nil { - if p.consume(tokenKindEOF) { - p.raiseParseError(synErrBExpUnclosed, "") - } - p.raiseParseError(synErrBExpNoElem, "") - } - inverse := exclude(elem, genAnyCharAST()) - if inverse == nil { - p.raiseParseError(synErrUnmatchablePattern, "") - } - for { - elem := p.parseBExpElem() - if elem == nil { - break - } - inverse = exclude(elem, inverse) - if inverse == nil { - p.raiseParseError(synErrUnmatchablePattern, "") - } - } - if p.consume(tokenKindEOF) { - p.raiseParseError(synErrBExpUnclosed, "") - } - p.expect(tokenKindBExpClose) - return inverse - } - if p.consume(tokenKindCodePointLeader) { - return p.parseCodePoint() - } - if p.consume(tokenKindCharPropLeader) { - return p.parseCharProp() - } - if p.consume(tokenKindFragmentLeader) { - return p.parseFragment() - } - c := p.parseNormalChar() - if c == nil { - if p.consume(tokenKindBExpClose) { - p.raiseParseError(synErrBExpInvalidForm, "") - } - return nil - } - return c -} - -func (p *parser) parseBExpElem() CPTree { - var left CPTree - switch { - case p.consume(tokenKindCodePointLeader): - left = p.parseCodePoint() - case p.consume(tokenKindCharPropLeader): - left = p.parseCharProp() - if p.consume(tokenKindCharRange) { - p.raiseParseError(synErrRangePropIsUnavailable, "") - } - default: - left = p.parseNormalChar() - } - if left == nil { - return nil - } - if !p.consume(tokenKindCharRange) { - return left - } - var right CPTree - switch { - case p.consume(tokenKindCodePointLeader): - right = p.parseCodePoint() - case p.consume(tokenKindCharPropLeader): - p.raiseParseError(synErrRangePropIsUnavailable, "") - default: - right = p.parseNormalChar() - } - if right == nil { - p.raiseParseError(synErrRangeInvalidForm, "") - } - from, _, _ := left.Range() - _, to, _ := right.Range() - if !isValidOrder(from, to) { - p.raiseParseError(synErrRangeInvalidOrder, fmt.Sprintf("%X..%X", from, to)) - } - return newRangeSymbolNode(from, to) -} - -func (p *parser) parseCodePoint() CPTree { - if !p.consume(tokenKindLBrace) { - p.raiseParseError(synErrCPExpInvalidForm, "") - } - if !p.consume(tokenKindCodePoint) { - p.raiseParseError(synErrCPExpInvalidForm, "") - } - - n, err := strconv.ParseInt(p.lastTok.codePoint, 16, 64) - if err != nil { - panic(fmt.Errorf("failed to decode a code point (%v) into a int: %v", p.lastTok.codePoint, err)) - } - if n < 0x0000 || n > 0x10FFFF { - p.raiseParseError(synErrCPExpOutOfRange, "") - } - - sym := newSymbolNode(rune(n)) - - if !p.consume(tokenKindRBrace) { - p.raiseParseError(synErrCPExpInvalidForm, "") - } - - return sym -} - -func (p *parser) parseCharProp() CPTree { - if !p.consume(tokenKindLBrace) { - p.raiseParseError(synErrCharPropExpInvalidForm, "") - } - var sym1, sym2 string - if !p.consume(tokenKindCharPropSymbol) { - p.raiseParseError(synErrCharPropExpInvalidForm, "") - } - sym1 = p.lastTok.propSymbol - if p.consume(tokenKindEqual) { - if !p.consume(tokenKindCharPropSymbol) { - p.raiseParseError(synErrCharPropExpInvalidForm, "") - } - sym2 = p.lastTok.propSymbol - } - - var alt CPTree - var propName, propVal string - if sym2 != "" { - propName = sym1 - propVal = sym2 - } else { - propName = "" - propVal = sym1 - } - if !p.isContributoryPropertyExposed && ucd.IsContributoryProperty(propName) { - p.raiseParseError(synErrCharPropUnsupported, propName) - } - pat, err := ucd.NormalizeCharacterProperty(propName, propVal) - if err != nil { - p.raiseParseError(synErrCharPropUnsupported, err.Error()) - } - if pat != "" { - p := NewParser(p.kind, bytes.NewReader([]byte(pat))) - p.exposeContributoryProperty() - ast, err := p.Parse() - if err != nil { - panic(err) - } - alt = ast - } else { - cpRanges, inverse, err := ucd.FindCodePointRanges(propName, propVal) - if err != nil { - p.raiseParseError(synErrCharPropUnsupported, err.Error()) - } - if inverse { - r := cpRanges[0] - alt = exclude(newRangeSymbolNode(r.From, r.To), genAnyCharAST()) - if alt == nil { - p.raiseParseError(synErrUnmatchablePattern, "") - } - for _, r := range cpRanges[1:] { - alt = exclude(newRangeSymbolNode(r.From, r.To), alt) - if alt == nil { - p.raiseParseError(synErrUnmatchablePattern, "") - } - } - } else { - for _, r := range cpRanges { - alt = genAltNode( - alt, - newRangeSymbolNode(r.From, r.To), - ) - } - } - } - - if !p.consume(tokenKindRBrace) { - p.raiseParseError(synErrCharPropExpInvalidForm, "") - } - - return alt -} - -func (p *parser) parseFragment() CPTree { - if !p.consume(tokenKindLBrace) { - p.raiseParseError(synErrFragmentExpInvalidForm, "") - } - if !p.consume(tokenKindFragmentSymbol) { - p.raiseParseError(synErrFragmentExpInvalidForm, "") - } - sym := p.lastTok.fragmentSymbol - - if !p.consume(tokenKindRBrace) { - p.raiseParseError(synErrFragmentExpInvalidForm, "") - } - - return newFragmentNode(spec.LexKindName(sym), nil) -} - -func (p *parser) parseNormalChar() CPTree { - if !p.consume(tokenKindChar) { - return nil - } - return newSymbolNode(p.lastTok.char) -} - -func exclude(symbol, base CPTree) CPTree { - if left, right, ok := symbol.Alternatives(); ok { - return exclude(right, exclude(left, base)) - } - - if left, right, ok := base.Alternatives(); ok { - return genAltNode( - exclude(symbol, left), - exclude(symbol, right), - ) - } - - if bFrom, bTo, ok := base.Range(); ok { - sFrom, sTo, ok := symbol.Range() - if !ok { - panic(fmt.Errorf("invalid symbol tree: %T", symbol)) - } - - switch { - case sFrom > bFrom && sTo < bTo: - return genAltNode( - newRangeSymbolNode(bFrom, sFrom-1), - newRangeSymbolNode(sTo+1, bTo), - ) - case sFrom <= bFrom && sTo >= bFrom && sTo < bTo: - return newRangeSymbolNode(sTo+1, bTo) - case sFrom > bFrom && sFrom <= bTo && sTo >= bTo: - return newRangeSymbolNode(bFrom, sFrom-1) - case sFrom <= bFrom && sTo >= bTo: - return nil - default: - return base - } - } - - panic(fmt.Errorf("invalid base tree: %T", base)) -} - -func genAnyCharAST() CPTree { - return newRangeSymbolNode(0x0, 0x10FFFF) -} - -func isValidOrder(from, to rune) bool { - return from <= to -} - -func genConcatNode(cs ...CPTree) CPTree { - nonNilNodes := []CPTree{} - for _, c := range cs { - if c == nil { - continue - } - nonNilNodes = append(nonNilNodes, c) - } - if len(nonNilNodes) <= 0 { - return nil - } - if len(nonNilNodes) == 1 { - return nonNilNodes[0] - } - concat := newConcatNode(nonNilNodes[0], nonNilNodes[1]) - for _, c := range nonNilNodes[2:] { - concat = newConcatNode(concat, c) - } - return concat -} - -func genAltNode(cs ...CPTree) CPTree { - nonNilNodes := []CPTree{} - for _, c := range cs { - if c == nil { - continue - } - nonNilNodes = append(nonNilNodes, c) - } - if len(nonNilNodes) <= 0 { - return nil - } - if len(nonNilNodes) == 1 { - return nonNilNodes[0] - } - alt := newAltNode(nonNilNodes[0], nonNilNodes[1]) - for _, c := range nonNilNodes[2:] { - alt = newAltNode(alt, c) - } - return alt -} - -func (p *parser) expect(expected tokenKind) { - if !p.consume(expected) { - tok := p.peekedTok - p.raiseParseError(synErrUnexpectedToken, fmt.Sprintf("expected: %v, actual: %v", expected, tok.kind)) - } -} - -func (p *parser) consume(expected tokenKind) bool { - var tok *token - var err error - if p.peekedTok != nil { - tok = p.peekedTok - p.peekedTok = nil - } else { - tok, err = p.lex.next() - if err != nil { - if err == ParseErr { - detail, cause := p.lex.error() - p.raiseParseError(cause, detail) - } - panic(err) - } - } - p.lastTok = tok - if tok.kind == expected { - return true - } - p.peekedTok = tok - p.lastTok = nil - - return false -} - -func (p *parser) raiseParseError(err error, detail string) { - p.errCause = err - p.errDetail = detail - panic(ParseErr) -} diff --git a/grammar/lexical/parser/parser_test.go b/grammar/lexical/parser/parser_test.go deleted file mode 100644 index e876d3b..0000000 --- a/grammar/lexical/parser/parser_test.go +++ /dev/null @@ -1,1389 +0,0 @@ -package parser - -import ( - "fmt" - "reflect" - "strings" - "testing" - - spec "spec/grammar" - "ucd" -) - -func TestParse(t *testing.T) { - tests := []struct { - pattern string - fragments map[spec.LexKindName]string - ast CPTree - syntaxError error - - // When an AST is large, as patterns containing a character property expression, this test only checks - // that the pattern is parsable. The check of the validity of such AST is performed by checking that it - // can be matched correctly using the driver. - skipTestAST bool - }{ - { - pattern: "a", - ast: newSymbolNode('a'), - }, - { - pattern: "abc", - ast: genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - }, - { - pattern: "a?", - ast: newOptionNode( - newSymbolNode('a'), - ), - }, - { - pattern: "[abc]?", - ast: newOptionNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\u{3042}?", - ast: newOptionNode( - newSymbolNode('\u3042'), - ), - }, - { - pattern: "\\p{Letter}?", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}?", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newOptionNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "(a)?", - ast: newOptionNode( - newSymbolNode('a'), - ), - }, - { - pattern: "((a?)?)?", - ast: newOptionNode( - newOptionNode( - newOptionNode( - newSymbolNode('a'), - ), - ), - ), - }, - { - pattern: "(abc)?", - ast: newOptionNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "(a|b)?", - ast: newOptionNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - }, - { - pattern: "?", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(?)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|?", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "?|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a??", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a*", - ast: newRepeatNode( - newSymbolNode('a'), - ), - }, - { - pattern: "[abc]*", - ast: newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\u{3042}*", - ast: newRepeatNode( - newSymbolNode('\u3042'), - ), - }, - { - pattern: "\\p{Letter}*", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}*", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newRepeatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "((a*)*)*", - ast: newRepeatNode( - newRepeatNode( - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - }, - { - pattern: "(abc)*", - ast: newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "(a|b)*", - ast: newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - }, - { - pattern: "*", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(*)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|*", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "*|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a**", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a+", - ast: genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - }, - { - pattern: "[abc]+", - ast: genConcatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "\\u{3042}+", - ast: genConcatNode( - newSymbolNode('\u3042'), - newRepeatNode( - newSymbolNode('\u3042'), - ), - ), - }, - { - pattern: "\\p{Letter}+", - skipTestAST: true, - }, - { - pattern: "\\f{a2c}+", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: genConcatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - newRepeatNode( - newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - ), - }, - { - pattern: "((a+)+)+", - ast: genConcatNode( - genConcatNode( - genConcatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - ), - newRepeatNode( - genConcatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newRepeatNode( - newSymbolNode('a'), - ), - ), - ), - ), - ), - ), - ), - }, - { - pattern: "(abc)+", - ast: genConcatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - newRepeatNode( - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - ), - }, - { - pattern: "(a|b)+", - ast: genConcatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - newRepeatNode( - genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - ), - ), - ), - }, - { - pattern: "+", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "(+)", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a|+", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "+|b", - syntaxError: synErrRepNoTarget, - }, - { - pattern: "a++", - syntaxError: synErrRepNoTarget, - }, - { - pattern: ".", - ast: newRangeSymbolNode(0x00, 0x10FFFF), - }, - { - pattern: "[a]", - ast: newSymbolNode('a'), - }, - { - pattern: "[abc]", - ast: genAltNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - }, - { - pattern: "[a-z]", - ast: newRangeSymbolNode('a', 'z'), - }, - { - pattern: "[A-Za-z]", - ast: genAltNode( - newRangeSymbolNode('A', 'Z'), - newRangeSymbolNode('a', 'z'), - ), - }, - { - pattern: "[\\u{004E}]", - ast: newSymbolNode('N'), - }, - { - pattern: "[\\u{0061}-\\u{007A}]", - ast: newRangeSymbolNode('a', 'z'), - }, - { - pattern: "[\\p{Lu}]", - skipTestAST: true, - }, - { - pattern: "[a-\\p{Lu}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[\\p{Lu}-z]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[\\p{Lu}-\\p{Ll}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[z-a]", - syntaxError: synErrRangeInvalidOrder, - }, - { - pattern: "a[]", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[]a", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[]", - syntaxError: synErrBExpNoElem, - }, - { - pattern: "[^\\u{004E}]", - ast: genAltNode( - newRangeSymbolNode(0x00, '\u004E'-1), - newRangeSymbolNode('\u004E'+1, 0x10FFFF), - ), - }, - { - pattern: "[^\\u{0061}-\\u{007A}]", - ast: genAltNode( - newRangeSymbolNode(0x00, '\u0061'-1), - newRangeSymbolNode('\u007A'+1, 0x10FFFF), - ), - }, - { - pattern: "[^\\p{Lu}]", - skipTestAST: true, - }, - { - pattern: "[^a-\\p{Lu}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\p{Lu}-z]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\p{Lu}-\\p{Ll}]", - syntaxError: synErrRangePropIsUnavailable, - }, - { - pattern: "[^\\u{0000}-\\u{10FFFF}]", - syntaxError: synErrUnmatchablePattern, - }, - { - pattern: "[^\\u{0000}-\\u{FFFF}\\u{010000}-\\u{10FFFF}]", - syntaxError: synErrUnmatchablePattern, - }, - { - pattern: "[^]", - ast: newSymbolNode('^'), - }, - { - pattern: "[", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^a", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[^a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([^a-", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "]", - ast: newSymbolNode(']'), - }, - { - pattern: "(]", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "a]", - ast: genConcatNode( - newSymbolNode('a'), - newSymbolNode(']'), - ), - }, - { - pattern: "(a]", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "([)", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "([a)", - syntaxError: synErrBExpUnclosed, - }, - { - pattern: "[a-]", - ast: genAltNode( - newSymbolNode('a'), - newSymbolNode('-'), - ), - }, - { - pattern: "[^a-]", - ast: genAltNode( - newRangeSymbolNode(0x00, 0x2C), - newRangeSymbolNode(0x2E, 0x60), - newRangeSymbolNode(0x62, 0x10FFFF), - ), - }, - { - pattern: "[-z]", - ast: genAltNode( - newSymbolNode('-'), - newSymbolNode('z'), - ), - }, - { - pattern: "[^-z]", - ast: newAltNode( - newRangeSymbolNode(0x00, 0x2C), - newAltNode( - newRangeSymbolNode(0x2E, 0x79), - newRangeSymbolNode(0x7B, 0x10FFFF), - ), - ), - }, - { - pattern: "[-]", - ast: newSymbolNode('-'), - }, - { - pattern: "[^-]", - ast: genAltNode( - newRangeSymbolNode(0x00, 0x2C), - newRangeSymbolNode(0x2E, 0x10FFFF), - ), - }, - { - pattern: "[^01]", - ast: genAltNode( - newRangeSymbolNode(0x00, '0'-1), - newRangeSymbolNode('1'+1, 0x10FFFF), - ), - }, - { - pattern: "[^10]", - ast: genAltNode( - newRangeSymbolNode(0x00, '0'-1), - newRangeSymbolNode('1'+1, 0x10FFFF), - ), - }, - { - pattern: "[^a-z]", - ast: genAltNode( - newRangeSymbolNode(0x00, 'a'-1), - newRangeSymbolNode('z'+1, 0x10FFFF), - ), - }, - { - pattern: "[^az]", - ast: genAltNode( - newRangeSymbolNode(0x00, 'a'-1), - genAltNode( - newRangeSymbolNode('a'+1, 'z'-1), - newRangeSymbolNode('z'+1, 0x10FFFF), - ), - ), - }, - { - pattern: "\\u{006E}", - ast: newSymbolNode('\u006E'), - }, - { - pattern: "\\u{03BD}", - ast: newSymbolNode('\u03BD'), - }, - { - pattern: "\\u{306B}", - ast: newSymbolNode('\u306B'), - }, - { - pattern: "\\u{01F638}", - ast: newSymbolNode('\U0001F638'), - }, - { - pattern: "\\u{0000}", - ast: newSymbolNode('\u0000'), - }, - { - pattern: "\\u{10FFFF}", - ast: newSymbolNode('\U0010FFFF'), - }, - { - pattern: "\\u{110000}", - syntaxError: synErrCPExpOutOfRange, - }, - { - pattern: "\\u", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{03BD", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\u{}", - syntaxError: synErrCPExpInvalidForm, - }, - { - pattern: "\\p{Letter}", - skipTestAST: true, - }, - { - pattern: "\\p{General_Category=Letter}", - skipTestAST: true, - }, - { - pattern: "\\p{ Letter }", - skipTestAST: true, - }, - { - pattern: "\\p{ General_Category = Letter }", - skipTestAST: true, - }, - { - pattern: "\\p", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{Letter", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{General_Category=}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{General_Category= }", - syntaxError: synErrCharPropInvalidSymbol, - }, - { - pattern: "\\p{=Letter}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{ =Letter}", - syntaxError: synErrCharPropInvalidSymbol, - }, - { - pattern: "\\p{=}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\p{}", - syntaxError: synErrCharPropExpInvalidForm, - }, - { - pattern: "\\f{a2c}", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\f{ a2c }", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - ast: newFragmentNode("a2c", - genConcatNode( - newSymbolNode('a'), - newSymbolNode('b'), - newSymbolNode('c'), - ), - ), - }, - { - pattern: "\\f", - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "\\f{", - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "\\f{a2c", - fragments: map[spec.LexKindName]string{ - "a2c": "abc", - }, - syntaxError: synErrFragmentExpInvalidForm, - }, - { - pattern: "(a)", - ast: newSymbolNode('a'), - }, - { - pattern: "(((a)))", - ast: newSymbolNode('a'), - }, - { - pattern: "a()", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "()a", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "()", - syntaxError: synErrGroupNoElem, - }, - { - pattern: "(", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "a(", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "(a", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "((", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: "((a)", - syntaxError: synErrGroupUnclosed, - }, - { - pattern: ")", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "a)", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: ")a", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "))", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "(a))", - syntaxError: synErrGroupNoInitiator, - }, - { - pattern: "Mulder|Scully", - ast: genAltNode( - genConcatNode( - newSymbolNode('M'), - newSymbolNode('u'), - newSymbolNode('l'), - newSymbolNode('d'), - newSymbolNode('e'), - newSymbolNode('r'), - ), - genConcatNode( - newSymbolNode('S'), - newSymbolNode('c'), - newSymbolNode('u'), - newSymbolNode('l'), - newSymbolNode('l'), - newSymbolNode('y'), - ), - ), - }, - { - pattern: "Langly|Frohike|Byers", - ast: genAltNode( - genConcatNode( - newSymbolNode('L'), - newSymbolNode('a'), - newSymbolNode('n'), - newSymbolNode('g'), - newSymbolNode('l'), - newSymbolNode('y'), - ), - genConcatNode( - newSymbolNode('F'), - newSymbolNode('r'), - newSymbolNode('o'), - newSymbolNode('h'), - newSymbolNode('i'), - newSymbolNode('k'), - newSymbolNode('e'), - ), - genConcatNode( - newSymbolNode('B'), - newSymbolNode('y'), - newSymbolNode('e'), - newSymbolNode('r'), - newSymbolNode('s'), - ), - ), - }, - { - pattern: "|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "||", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Mulder|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Scully", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Langly|Frohike|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Langly||Byers", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Frohike|Byers", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "|Frohike|", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Fox(|)Mulder", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "(Fox|)Mulder", - syntaxError: synErrAltLackOfOperand, - }, - { - pattern: "Fox(|Mulder)", - syntaxError: synErrAltLackOfOperand, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v %v", i, tt.pattern), func(t *testing.T) { - fragmentTrees := map[spec.LexKindName]CPTree{} - for kind, pattern := range tt.fragments { - p := NewParser(kind, strings.NewReader(pattern)) - root, err := p.Parse() - if err != nil { - t.Fatal(err) - } - - fragmentTrees[kind] = root - } - err := CompleteFragments(fragmentTrees) - if err != nil { - t.Fatal(err) - } - - p := NewParser(spec.LexKindName("test"), strings.NewReader(tt.pattern)) - root, err := p.Parse() - if tt.syntaxError != nil { - // printCPTree(os.Stdout, root, "", "") - if err != ParseErr { - t.Fatalf("unexpected error: want: %v, got: %v", ParseErr, err) - } - _, synErr := p.Error() - if synErr != tt.syntaxError { - t.Fatalf("unexpected syntax error: want: %v, got: %v", tt.syntaxError, synErr) - } - if root != nil { - t.Fatalf("tree must be nil") - } - } else { - if err != nil { - detail, cause := p.Error() - t.Fatalf("%v: %v: %v", err, cause, detail) - } - if root == nil { - t.Fatal("tree must be non-nil") - } - - complete, err := ApplyFragments(root, fragmentTrees) - if err != nil { - t.Fatal(err) - } - if !complete { - t.Fatalf("incomplete fragments") - } - - // printCPTree(os.Stdout, root, "", "") - if !tt.skipTestAST { - r := root.(*rootNode) - testAST(t, tt.ast, r.tree) - } - } - }) - } -} - -func TestParse_ContributoryPropertyIsNotExposed(t *testing.T) { - for _, cProp := range ucd.ContributoryProperties() { - t.Run(fmt.Sprintf("%v", cProp), func(t *testing.T) { - p := NewParser(spec.LexKindName("test"), strings.NewReader(fmt.Sprintf(`\p{%v=yes}`, cProp))) - root, err := p.Parse() - if err == nil { - t.Fatalf("expected syntax error: got: nil") - } - _, synErr := p.Error() - if synErr != synErrCharPropUnsupported { - t.Fatalf("unexpected syntax error: want: %v, got: %v", synErrCharPropUnsupported, synErr) - } - if root != nil { - t.Fatalf("tree is not nil") - } - }) - } -} - -func TestExclude(t *testing.T) { - for _, test := range []struct { - caption string - target CPTree - base CPTree - result CPTree - }{ - // t.From > b.From && t.To < b.To - - // |t.From - b.From| = 1 - // |b.To - t.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.To| = 1", - target: newSymbolNode('1'), - base: newRangeSymbolNode('0', '2'), - result: newAltNode( - newSymbolNode('0'), - newSymbolNode('2'), - ), - }, - // |t.From - b.From| > 1 - // |b.To - t.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+--+--+ - // Result (b - t): +--+--+ +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.To| > 1", - target: newSymbolNode('2'), - base: newRangeSymbolNode('0', '4'), - result: newAltNode( - newRangeSymbolNode('0', '1'), - newRangeSymbolNode('3', '4'), - ), - }, - - // t.From <= b.From && t.To >= b.From && t.To < b.To - - // |b.From - t.From| = 0 - // |t.To - b.From| = 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", - target: newSymbolNode('0'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('1'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| = 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", - target: newSymbolNode('0'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('1', '2'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| > 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('2'), - }, - // |b.From - t.From| = 0 - // |t.To - b.From| > 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| = 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('2', '3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| = 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('1', '2'), - result: newSymbolNode('2'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| = 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| = 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '1'), - base: newRangeSymbolNode('1', '3'), - result: newRangeSymbolNode('2', '3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| > 0 - // |b.To - t.To| = 1 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| = 1", - target: newRangeSymbolNode('0', '2'), - base: newRangeSymbolNode('1', '3'), - result: newSymbolNode('3'), - }, - // |b.From - t.From| > 0 - // |t.To - b.From| > 0 - // |b.To - t.To| > 1 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|b.From - t.From| > 0 && |t.To - b.From| > 0 && |b.To - t.To| > 1", - target: newRangeSymbolNode('0', '2'), - base: newRangeSymbolNode('1', '4'), - result: newRangeSymbolNode('3', '4'), - }, - - // t.From > b.From && t.From <= b.To && t.To >= b.To - - // |t.From - b.From| = 1 - // |b.To - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('1'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('1', '2'), - base: newRangeSymbolNode('0', '1'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('1', '2'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| = 1 - // |b.To - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+ - { - caption: "|t.From - b.From| = 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('1', '3'), - base: newRangeSymbolNode('0', '2'), - result: newSymbolNode('0'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('2'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('2', '3'), - base: newRangeSymbolNode('0', '2'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('2', '3'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('0', '1'), - }, - // |t.From - b.From| > 1 - // |b.To - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+--+--+--+ - // Result (b - t): +--+--+ - { - caption: "|t.From - b.From| > 1 && |b.To - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('2', '4'), - base: newRangeSymbolNode('0', '3'), - result: newRangeSymbolNode('0', '1'), - }, - - // t.From <= b.From && t.To >= b.To - - // |b.From - t.From| = 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| = 0 && |t.To - b.To| = 0", - target: newSymbolNode('0'), - base: newSymbolNode('0'), - result: nil, - }, - // |b.From - t.From| = 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| = 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('0', '1'), - base: newSymbolNode('0'), - result: nil, - }, - // |b.From - t.From| > 0 - // |t.To - b.To| = 0 - // - // Target (t): +--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| > 0 && |t.To - b.To| = 0", - target: newRangeSymbolNode('0', '1'), - base: newSymbolNode('1'), - result: nil, - }, - // |b.From - t.From| > 0 - // |t.To - b.To| > 0 - // - // Target (t): +--+--+--+ - // Base (b): +--+ - // Result (b - t): N/A - { - caption: "|b.From - t.From| > 0 && |t.To - b.To| > 0", - target: newRangeSymbolNode('0', '2'), - base: newSymbolNode('1'), - result: nil, - }, - - // Others - - // |b.From - t.From| = 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| = 1", - target: newSymbolNode('0'), - base: newSymbolNode('1'), - result: newSymbolNode('1'), - }, - // |b.From - t.From| > 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|b.From - t.From| > 1", - target: newSymbolNode('0'), - base: newSymbolNode('2'), - result: newSymbolNode('2'), - }, - // |t.To - b.To| = 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|t.To - b.To| = 1", - target: newSymbolNode('1'), - base: newSymbolNode('0'), - result: newSymbolNode('0'), - }, - // |t.To - b.To| > 1 - // - // Target (t): +--+ - // Base (b): +--+ - // Result (b - t): +--+ - { - caption: "|t.To - b.To| > 1", - target: newSymbolNode('2'), - base: newSymbolNode('0'), - result: newSymbolNode('0'), - }, - } { - t.Run(test.caption, func(t *testing.T) { - r := exclude(test.target, test.base) - testAST(t, test.result, r) - }) - } -} - -func testAST(t *testing.T, expected, actual CPTree) { - t.Helper() - - aTy := reflect.TypeOf(actual) - eTy := reflect.TypeOf(expected) - if eTy != aTy { - t.Fatalf("unexpected node: want: %+v, got: %+v", eTy, aTy) - } - - if actual == nil { - return - } - - switch e := expected.(type) { - case *symbolNode: - a := actual.(*symbolNode) - if a.From != e.From || a.To != e.To { - t.Fatalf("unexpected node: want: %+v, got: %+v", e, a) - } - } - eLeft, eRight := expected.children() - aLeft, aRight := actual.children() - testAST(t, eLeft, aLeft) - testAST(t, eRight, aRight) -} diff --git a/grammar/lexical/parser/tree.go b/grammar/lexical/parser/tree.go deleted file mode 100644 index 0d64e1d..0000000 --- a/grammar/lexical/parser/tree.go +++ /dev/null @@ -1,459 +0,0 @@ -package parser - -import ( - "fmt" - "io" - "sort" - - spec "spec/grammar" -) - -type CPRange struct { - From rune - To rune -} - -type CPTree interface { - fmt.Stringer - Range() (rune, rune, bool) - Optional() (CPTree, bool) - Repeatable() (CPTree, bool) - Concatenation() (CPTree, CPTree, bool) - Alternatives() (CPTree, CPTree, bool) - Describe() (spec.LexKindName, []spec.LexKindName, error) - - children() (CPTree, CPTree) - clone() CPTree -} - -var ( - _ CPTree = &rootNode{} - _ CPTree = &symbolNode{} - _ CPTree = &concatNode{} - _ CPTree = &altNode{} - _ CPTree = &quantifierNode{} - _ CPTree = &fragmentNode{} -) - -type rootNode struct { - kind spec.LexKindName - tree CPTree - fragments map[spec.LexKindName][]*fragmentNode -} - -func newRootNode(kind spec.LexKindName, t CPTree) *rootNode { - fragments := map[spec.LexKindName][]*fragmentNode{} - collectFragments(t, fragments) - - return &rootNode{ - kind: kind, - tree: t, - fragments: fragments, - } -} - -func collectFragments(n CPTree, fragments map[spec.LexKindName][]*fragmentNode) { - if n == nil { - return - } - - if f, ok := n.(*fragmentNode); ok { - fragments[f.kind] = append(fragments[f.kind], f) - return - } - - l, r := n.children() - collectFragments(l, fragments) - collectFragments(r, fragments) -} - -func (n *rootNode) String() string { - return fmt.Sprintf("root: %v: %v fragments", n.kind, len(n.fragments)) -} - -func (n *rootNode) Range() (rune, rune, bool) { - return n.tree.Range() -} - -func (n *rootNode) Optional() (CPTree, bool) { - return n.tree.Optional() -} - -func (n *rootNode) Repeatable() (CPTree, bool) { - return n.tree.Repeatable() -} - -func (n *rootNode) Concatenation() (CPTree, CPTree, bool) { - return n.tree.Concatenation() -} - -func (n *rootNode) Alternatives() (CPTree, CPTree, bool) { - return n.tree.Alternatives() -} - -func (n *rootNode) Describe() (spec.LexKindName, []spec.LexKindName, error) { - var frags []spec.LexKindName - for f := range n.fragments { - frags = append(frags, spec.LexKindName(f)) - } - sort.Slice(frags, func(i, j int) bool { - return frags[i] < frags[j] - }) - - return n.kind, frags, nil -} - -func (n *rootNode) children() (CPTree, CPTree) { - return n.tree.children() -} - -func (n *rootNode) clone() CPTree { - return n.tree.clone() -} - -func (n *rootNode) incomplete() bool { - return len(n.fragments) > 0 -} - -func (n *rootNode) applyFragment(kind spec.LexKindName, fragment CPTree) error { - root, ok := fragment.(*rootNode) - if !ok { - return fmt.Errorf("applyFragment can take only *rootNode: %T", fragment) - } - if root.incomplete() { - return fmt.Errorf("fragment is incomplete") - } - - fs, ok := n.fragments[kind] - if !ok { - return nil - } - for _, f := range fs { - f.tree = root.clone() - } - delete(n.fragments, kind) - - return nil -} - -type symbolNode struct { - CPRange -} - -func newSymbolNode(cp rune) *symbolNode { - return &symbolNode{ - CPRange: CPRange{ - From: cp, - To: cp, - }, - } -} - -func newRangeSymbolNode(from, to rune) *symbolNode { - return &symbolNode{ - CPRange: CPRange{ - From: from, - To: to, - }, - } -} - -func (n *symbolNode) String() string { - return fmt.Sprintf("symbol: %X..%X", n.From, n.To) -} - -func (n *symbolNode) Range() (rune, rune, bool) { - return n.From, n.To, true -} - -func (n *symbolNode) Optional() (CPTree, bool) { - return nil, false -} - -func (n *symbolNode) Repeatable() (CPTree, bool) { - return nil, false -} - -func (n *symbolNode) Concatenation() (CPTree, CPTree, bool) { - return nil, nil, false -} - -func (n *symbolNode) Alternatives() (CPTree, CPTree, bool) { - return nil, nil, false -} - -func (n *symbolNode) Describe() (spec.LexKindName, []spec.LexKindName, error) { - return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n) -} - -func (n *symbolNode) children() (CPTree, CPTree) { - return nil, nil -} - -func (n *symbolNode) clone() CPTree { - return newRangeSymbolNode(n.From, n.To) -} - -type concatNode struct { - left CPTree - right CPTree -} - -func newConcatNode(left, right CPTree) *concatNode { - return &concatNode{ - left: left, - right: right, - } -} - -func (n *concatNode) String() string { - return "concat" -} - -func (n *concatNode) Range() (rune, rune, bool) { - return 0, 0, false -} - -func (n *concatNode) Optional() (CPTree, bool) { - return nil, false -} - -func (n *concatNode) Repeatable() (CPTree, bool) { - return nil, false -} - -func (n *concatNode) Concatenation() (CPTree, CPTree, bool) { - return n.left, n.right, true -} - -func (n *concatNode) Alternatives() (CPTree, CPTree, bool) { - return nil, nil, false -} - -func (n *concatNode) Describe() (spec.LexKindName, []spec.LexKindName, error) { - return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n) -} - -func (n *concatNode) children() (CPTree, CPTree) { - return n.left, n.right -} - -func (n *concatNode) clone() CPTree { - if n == nil { - return nil - } - return newConcatNode(n.left.clone(), n.right.clone()) -} - -type altNode struct { - left CPTree - right CPTree -} - -func newAltNode(left, right CPTree) *altNode { - return &altNode{ - left: left, - right: right, - } -} - -func (n *altNode) String() string { - return "alt" -} - -func (n *altNode) Range() (rune, rune, bool) { - return 0, 0, false -} - -func (n *altNode) Optional() (CPTree, bool) { - return nil, false -} - -func (n *altNode) Repeatable() (CPTree, bool) { - return nil, false -} - -func (n *altNode) Concatenation() (CPTree, CPTree, bool) { - return nil, nil, false -} - -func (n *altNode) Alternatives() (CPTree, CPTree, bool) { - return n.left, n.right, true -} - -func (n *altNode) Describe() (spec.LexKindName, []spec.LexKindName, error) { - return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n) -} - -func (n *altNode) children() (CPTree, CPTree) { - return n.left, n.right -} - -func (n *altNode) clone() CPTree { - return newAltNode(n.left.clone(), n.right.clone()) -} - -type quantifierNode struct { - optional bool - repeatable bool - tree CPTree -} - -func (n *quantifierNode) String() string { - switch { - case n.repeatable: - return "repeatable (>= 0 times)" - case n.optional: - return "optional (0 or 1 times)" - default: - return "invalid quantifier" - } -} - -func newRepeatNode(t CPTree) *quantifierNode { - return &quantifierNode{ - repeatable: true, - tree: t, - } -} - -func newRepeatOneOrMoreNode(t CPTree) *concatNode { - return newConcatNode( - t, - &quantifierNode{ - repeatable: true, - tree: t.clone(), - }) -} - -func newOptionNode(t CPTree) *quantifierNode { - return &quantifierNode{ - optional: true, - tree: t, - } -} - -func (n *quantifierNode) Range() (rune, rune, bool) { - return 0, 0, false -} - -func (n *quantifierNode) Optional() (CPTree, bool) { - return n.tree, n.optional -} - -func (n *quantifierNode) Repeatable() (CPTree, bool) { - return n.tree, n.repeatable -} - -func (n *quantifierNode) Concatenation() (CPTree, CPTree, bool) { - return nil, nil, false -} - -func (n *quantifierNode) Alternatives() (CPTree, CPTree, bool) { - return nil, nil, false -} - -func (n *quantifierNode) Describe() (spec.LexKindName, []spec.LexKindName, error) { - return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n) -} - -func (n *quantifierNode) children() (CPTree, CPTree) { - return n.tree, nil -} - -func (n *quantifierNode) clone() CPTree { - if n.repeatable { - return newRepeatNode(n.tree.clone()) - } - return newOptionNode(n.tree.clone()) -} - -type fragmentNode struct { - kind spec.LexKindName - tree CPTree -} - -func newFragmentNode(kind spec.LexKindName, t CPTree) *fragmentNode { - return &fragmentNode{ - kind: kind, - tree: t, - } -} - -func (n *fragmentNode) String() string { - return fmt.Sprintf("fragment: %v", n.kind) -} - -func (n *fragmentNode) Range() (rune, rune, bool) { - return n.tree.Range() -} - -func (n *fragmentNode) Optional() (CPTree, bool) { - return n.tree.Optional() -} - -func (n *fragmentNode) Repeatable() (CPTree, bool) { - return n.tree.Repeatable() -} - -func (n *fragmentNode) Concatenation() (CPTree, CPTree, bool) { - return n.tree.Concatenation() -} - -func (n *fragmentNode) Alternatives() (CPTree, CPTree, bool) { - return n.tree.Alternatives() -} - -func (n *fragmentNode) Describe() (spec.LexKindName, []spec.LexKindName, error) { - return spec.LexKindNameNil, nil, fmt.Errorf("%T cannot describe", n) -} - -func (n *fragmentNode) children() (CPTree, CPTree) { - return n.tree.children() -} - -func (n *fragmentNode) clone() CPTree { - if n.tree == nil { - return newFragmentNode(n.kind, nil) - } - return newFragmentNode(n.kind, n.tree.clone()) -} - -//nolint:unused -func printCPTree(w io.Writer, t CPTree, ruledLine string, childRuledLinePrefix string) { - if t == nil { - return - } - fmt.Fprintf(w, "%v%v\n", ruledLine, t) - children := []CPTree{} - switch n := t.(type) { - case *rootNode: - children = append(children, n.tree) - case *fragmentNode: - children = append(children, n.tree) - default: - left, right := t.children() - if left != nil { - children = append(children, left) - } - if right != nil { - children = append(children, right) - } - } - num := len(children) - for i, child := range children { - line := "└─ " - if num > 1 { - if i == 0 { - line = "├─ " - } else if i < num-1 { - line = "│ " - } - } - prefix := "│ " - if i >= num-1 { - prefix = " " - } - printCPTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) - } -} diff --git a/grammar/lr0.go b/grammar/lr0.go deleted file mode 100644 index 7e5929a..0000000 --- a/grammar/lr0.go +++ /dev/null @@ -1,197 +0,0 @@ -package grammar - -import ( - "fmt" - "sort" - - "grammar/symbol" -) - -type lr0Automaton struct { - initialState kernelID - states map[kernelID]*lrState -} - -func genLR0Automaton(prods *productionSet, startSym symbol.Symbol, errSym symbol.Symbol) (*lr0Automaton, error) { - if !startSym.IsStart() { - return nil, fmt.Errorf("passed symbold is not a start symbol") - } - - automaton := &lr0Automaton{ - states: map[kernelID]*lrState{}, - } - - currentState := stateNumInitial - knownKernels := map[kernelID]struct{}{} - uncheckedKernels := []*kernel{} - - // Generate an initial kernel. - { - prods, _ := prods.findByLHS(startSym) - initialItem, err := newLR0Item(prods[0], 0) - if err != nil { - return nil, err - } - - k, err := newKernel([]*lrItem{initialItem}) - if err != nil { - return nil, err - } - - automaton.initialState = k.id - knownKernels[k.id] = struct{}{} - uncheckedKernels = append(uncheckedKernels, k) - } - - for len(uncheckedKernels) > 0 { - nextUncheckedKernels := []*kernel{} - for _, k := range uncheckedKernels { - state, neighbours, err := genStateAndNeighbourKernels(k, prods, errSym) - if err != nil { - return nil, err - } - state.num = currentState - currentState = currentState.next() - - automaton.states[state.id] = state - - for _, k := range neighbours { - if _, known := knownKernels[k.id]; known { - continue - } - knownKernels[k.id] = struct{}{} - nextUncheckedKernels = append(nextUncheckedKernels, k) - } - } - uncheckedKernels = nextUncheckedKernels - } - - return automaton, nil -} - -func genStateAndNeighbourKernels(k *kernel, prods *productionSet, errSym symbol.Symbol) (*lrState, []*kernel, error) { - items, err := genLR0Closure(k, prods) - if err != nil { - return nil, nil, err - } - neighbours, err := genNeighbourKernels(items, prods) - if err != nil { - return nil, nil, err - } - - next := map[symbol.Symbol]kernelID{} - kernels := []*kernel{} - for _, n := range neighbours { - next[n.symbol] = n.kernel.id - kernels = append(kernels, n.kernel) - } - - reducible := map[productionID]struct{}{} - var emptyProdItems []*lrItem - isErrorTrapper := false - for _, item := range items { - if item.dottedSymbol == errSym { - isErrorTrapper = true - } - - if item.reducible { - reducible[item.prod] = struct{}{} - - prod, ok := prods.findByID(item.prod) - if !ok { - return nil, nil, fmt.Errorf("reducible production not found: %v", item.prod) - } - if prod.isEmpty() { - emptyProdItems = append(emptyProdItems, item) - } - } - } - - return &lrState{ - kernel: k, - next: next, - reducible: reducible, - emptyProdItems: emptyProdItems, - isErrorTrapper: isErrorTrapper, - }, kernels, nil -} - -func genLR0Closure(k *kernel, prods *productionSet) ([]*lrItem, error) { - items := []*lrItem{} - knownItems := map[lrItemID]struct{}{} - uncheckedItems := []*lrItem{} - for _, item := range k.items { - items = append(items, item) - uncheckedItems = append(uncheckedItems, item) - } - for len(uncheckedItems) > 0 { - nextUncheckedItems := []*lrItem{} - for _, item := range uncheckedItems { - if item.dottedSymbol.IsTerminal() { - continue - } - - ps, _ := prods.findByLHS(item.dottedSymbol) - for _, prod := range ps { - item, err := newLR0Item(prod, 0) - if err != nil { - return nil, err - } - if _, exist := knownItems[item.id]; exist { - continue - } - items = append(items, item) - knownItems[item.id] = struct{}{} - nextUncheckedItems = append(nextUncheckedItems, item) - } - } - uncheckedItems = nextUncheckedItems - } - - return items, nil -} - -type neighbourKernel struct { - symbol symbol.Symbol - kernel *kernel -} - -func genNeighbourKernels(items []*lrItem, prods *productionSet) ([]*neighbourKernel, error) { - kItemMap := map[symbol.Symbol][]*lrItem{} - for _, item := range items { - if item.dottedSymbol.IsNil() { - continue - } - prod, ok := prods.findByID(item.prod) - if !ok { - return nil, fmt.Errorf("a production was not found: %v", item.prod) - } - kItem, err := newLR0Item(prod, item.dot+1) - if err != nil { - return nil, err - } - kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem) - } - - nextSyms := []symbol.Symbol{} - for sym := range kItemMap { - nextSyms = append(nextSyms, sym) - } - sort.Slice(nextSyms, func(i, j int) bool { - return nextSyms[i] < nextSyms[j] - }) - - kernels := []*neighbourKernel{} - for _, sym := range nextSyms { - k, err := newKernel(kItemMap[sym]) - if err != nil { - return nil, err - } - kernels = append(kernels, &neighbourKernel{ - symbol: sym, - kernel: k, - }) - } - - return kernels, nil -} diff --git a/grammar/lr0_test.go b/grammar/lr0_test.go deleted file mode 100644 index 798c2fa..0000000 --- a/grammar/lr0_test.go +++ /dev/null @@ -1,448 +0,0 @@ -package grammar - -import ( - "fmt" - "strings" - "testing" - - "grammar/symbol" - "spec/grammar/parser" -) - -type expectedLRState struct { - kernelItems []*lrItem - nextStates map[symbol.Symbol][]*lrItem - reducibleProds []*production - emptyProdItems []*lrItem -} - -func TestGenLR0Automaton(t *testing.T) { - src := ` -#name test; - -expr - : expr add term - | term - ; -term - : term mul factor - | factor - ; -factor - : l_paren expr r_paren - | id - ; -add: "\+"; -mul: "\*"; -l_paren: "\("; -r_paren: "\)"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -` - - var gram *Grammar - var automaton *lr0Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLR0Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - genLR0Item("expr'", 0, "expr"), - }, - 1: { - genLR0Item("expr'", 1, "expr"), - genLR0Item("expr", 1, "expr", "add", "term"), - }, - 2: { - genLR0Item("expr", 1, "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 3: { - genLR0Item("term", 1, "factor"), - }, - 4: { - genLR0Item("factor", 1, "l_paren", "expr", "r_paren"), - }, - 5: { - genLR0Item("factor", 1, "id"), - }, - 6: { - genLR0Item("expr", 2, "expr", "add", "term"), - }, - 7: { - genLR0Item("term", 2, "term", "mul", "factor"), - }, - 8: { - genLR0Item("expr", 1, "expr", "add", "term"), - genLR0Item("factor", 2, "l_paren", "expr", "r_paren"), - }, - 9: { - genLR0Item("expr", 3, "expr", "add", "term"), - genLR0Item("term", 1, "term", "mul", "factor"), - }, - 10: { - genLR0Item("term", 3, "term", "mul", "factor"), - }, - 11: { - genLR0Item("factor", 3, "l_paren", "expr", "r_paren"), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("expr"): expectedKernels[1], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("add"): expectedKernels[6], - }, - reducibleProds: []*production{ - genProd("expr'", "expr"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("mul"): expectedKernels[7], - }, - reducibleProds: []*production{ - genProd("expr", "term"), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("term", "factor"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("expr"): expectedKernels[8], - genSym("term"): expectedKernels[2], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[5], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("factor", "id"), - }, - }, - { - kernelItems: expectedKernels[6], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("term"): expectedKernels[9], - genSym("factor"): expectedKernels[3], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[7], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("factor"): expectedKernels[10], - genSym("l_paren"): expectedKernels[4], - genSym("id"): expectedKernels[5], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[8], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("add"): expectedKernels[6], - genSym("r_paren"): expectedKernels[11], - }, - reducibleProds: []*production{}, - }, - { - kernelItems: expectedKernels[9], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("mul"): expectedKernels[7], - }, - reducibleProds: []*production{ - genProd("expr", "expr", "add", "term"), - }, - }, - { - kernelItems: expectedKernels[10], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("term", "term", "mul", "factor"), - }, - }, - { - kernelItems: expectedKernels[11], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("factor", "l_paren", "expr", "r_paren"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton) -} - -func TestLR0AutomatonContainingEmptyProduction(t *testing.T) { - src := ` -#name test; - -s - : foo bar - ; -foo - : - ; -bar - : b - | - ; - -b: "bar"; -` - - var gram *Grammar - var automaton *lr0Automaton - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - - automaton, err = genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatalf("failed to create a LR0 automaton: %v", err) - } - if automaton == nil { - t.Fatalf("genLR0Automaton returns nil without any error") - } - } - - initialState := automaton.states[automaton.initialState] - if initialState == nil { - t.Errorf("failed to get an initial status: %v", automaton.initialState) - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - genLR0Item("s'", 0, "s"), - }, - 1: { - genLR0Item("s'", 1, "s"), - }, - 2: { - genLR0Item("s", 1, "foo", "bar"), - }, - 3: { - genLR0Item("s", 2, "foo", "bar"), - }, - 4: { - genLR0Item("bar", 1, "b"), - }, - } - - expectedStates := []*expectedLRState{ - { - kernelItems: expectedKernels[0], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("foo"): expectedKernels[2], - }, - reducibleProds: []*production{ - genProd("foo"), - }, - emptyProdItems: []*lrItem{ - genLR0Item("foo", 0), - }, - }, - { - kernelItems: expectedKernels[1], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s'", "s"), - }, - }, - { - kernelItems: expectedKernels[2], - nextStates: map[symbol.Symbol][]*lrItem{ - genSym("bar"): expectedKernels[3], - genSym("b"): expectedKernels[4], - }, - reducibleProds: []*production{ - genProd("bar"), - }, - emptyProdItems: []*lrItem{ - genLR0Item("bar", 0), - }, - }, - { - kernelItems: expectedKernels[3], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("s", "foo", "bar"), - }, - }, - { - kernelItems: expectedKernels[4], - nextStates: map[symbol.Symbol][]*lrItem{}, - reducibleProds: []*production{ - genProd("bar", "b"), - }, - }, - } - - testLRAutomaton(t, expectedStates, automaton) -} - -func testLRAutomaton(t *testing.T, expected []*expectedLRState, automaton *lr0Automaton) { - if len(automaton.states) != len(expected) { - t.Errorf("state count is mismatched; want: %v, got: %v", len(expected), len(automaton.states)) - } - - for i, eState := range expected { - t.Run(fmt.Sprintf("state #%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("a kernel was not found: %v", k.id) - } - - // test look-ahead symbols - { - if len(state.kernel.items) != len(eState.kernelItems) { - t.Errorf("kernels is mismatched; want: %v, got: %v", len(eState.kernelItems), len(state.kernel.items)) - } - for _, eKItem := range eState.kernelItems { - var kItem *lrItem - for _, it := range state.kernel.items { - if it.id != eKItem.id { - continue - } - kItem = it - break - } - if kItem == nil { - t.Fatalf("kernel item not found; want: %v, got: %v", eKItem.id, kItem.id) - } - - if len(kItem.lookAhead.symbols) != len(eKItem.lookAhead.symbols) { - t.Errorf("look-ahead symbols are mismatched; want: %v symbols, got: %v symbols", len(eKItem.lookAhead.symbols), len(kItem.lookAhead.symbols)) - } - - for eSym := range eKItem.lookAhead.symbols { - if _, ok := kItem.lookAhead.symbols[eSym]; !ok { - t.Errorf("look-ahead symbol not found: %v", eSym) - } - } - } - } - - // test next states - { - if len(state.next) != len(eState.nextStates) { - t.Errorf("next state count is mismcthed; want: %v, got: %v", len(eState.nextStates), len(state.next)) - } - for eSym, eKItems := range eState.nextStates { - nextStateKernel, err := newKernel(eKItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - nextState, ok := state.next[eSym] - if !ok { - t.Fatalf("next state was not found; state: %v, symbol: %v (%v)", state.id, "expr", eSym) - } - if nextState != nextStateKernel.id { - t.Fatalf("a kernel ID of the next state is mismatched; want: %v, got: %v", nextStateKernel.id, nextState) - } - } - } - - // test reducible productions - { - if len(state.reducible) != len(eState.reducibleProds) { - t.Errorf("reducible production count is mismatched; want: %v, got: %v", len(eState.reducibleProds), len(state.reducible)) - } - for _, eProd := range eState.reducibleProds { - if _, ok := state.reducible[eProd.id]; !ok { - t.Errorf("reducible production was not found: %v", eProd.id) - } - } - - if len(state.emptyProdItems) != len(eState.emptyProdItems) { - t.Errorf("empty production item is mismatched; want: %v, got: %v", len(eState.emptyProdItems), len(state.emptyProdItems)) - } - for _, eItem := range eState.emptyProdItems { - found := false - for _, item := range state.emptyProdItems { - if item.id != eItem.id { - continue - } - found = true - break - } - if !found { - t.Errorf("empty production item not found: %v", eItem.id) - } - } - } - }) - } -} diff --git a/grammar/parsing_table.go b/grammar/parsing_table.go deleted file mode 100644 index fc520cc..0000000 --- a/grammar/parsing_table.go +++ /dev/null @@ -1,553 +0,0 @@ -package grammar - -import ( - "fmt" - "sort" - - "grammar/symbol" - spec "spec/grammar" -) - -type ActionType string - -const ( - ActionTypeShift = ActionType("shift") - ActionTypeReduce = ActionType("reduce") - ActionTypeError = ActionType("error") -) - -type actionEntry int - -const actionEntryEmpty = actionEntry(0) - -func newShiftActionEntry(state stateNum) actionEntry { - return actionEntry(state * -1) -} - -func newReduceActionEntry(prod productionNum) actionEntry { - return actionEntry(prod) -} - -func (e actionEntry) isEmpty() bool { - return e == actionEntryEmpty -} - -func (e actionEntry) describe() (ActionType, stateNum, productionNum) { - if e == actionEntryEmpty { - return ActionTypeError, stateNumInitial, productionNumNil - } - if e < 0 { - return ActionTypeShift, stateNum(e * -1), productionNumNil - } - return ActionTypeReduce, stateNumInitial, productionNum(e) -} - -type GoToType string - -const ( - GoToTypeRegistered = GoToType("registered") - GoToTypeError = GoToType("error") -) - -type goToEntry uint - -const goToEntryEmpty = goToEntry(0) - -func newGoToEntry(state stateNum) goToEntry { - return goToEntry(state) -} - -func (e goToEntry) describe() (GoToType, stateNum) { - if e == goToEntryEmpty { - return GoToTypeError, stateNumInitial - } - return GoToTypeRegistered, stateNum(e) -} - -type conflictResolutionMethod int - -func (m conflictResolutionMethod) Int() int { - return int(m) -} - -const ( - ResolvedByPrec conflictResolutionMethod = 1 - ResolvedByAssoc conflictResolutionMethod = 2 - ResolvedByShift conflictResolutionMethod = 3 - ResolvedByProdOrder conflictResolutionMethod = 4 -) - -type conflict interface { - conflict() -} - -type shiftReduceConflict struct { - state stateNum - sym symbol.Symbol - nextState stateNum - prodNum productionNum - resolvedBy conflictResolutionMethod -} - -func (c *shiftReduceConflict) conflict() { -} - -type reduceReduceConflict struct { - state stateNum - sym symbol.Symbol - prodNum1 productionNum - prodNum2 productionNum - resolvedBy conflictResolutionMethod -} - -func (c *reduceReduceConflict) conflict() { -} - -var ( - _ conflict = &shiftReduceConflict{} - _ conflict = &reduceReduceConflict{} -) - -type ParsingTable struct { - actionTable []actionEntry - goToTable []goToEntry - stateCount int - terminalCount int - nonTerminalCount int - - // errorTrapperStates's index means a state number, and when `errorTrapperStates[stateNum]` is `1`, - // the state has an item having the following form. The `α` and `β` can be empty. - // - // A → α・error β - errorTrapperStates []int - - InitialState stateNum -} - -func (t *ParsingTable) getAction(state stateNum, sym symbol.SymbolNum) (ActionType, stateNum, productionNum) { - pos := state.Int()*t.terminalCount + sym.Int() - return t.actionTable[pos].describe() -} - -func (t *ParsingTable) getGoTo(state stateNum, sym symbol.SymbolNum) (GoToType, stateNum) { - pos := state.Int()*t.nonTerminalCount + sym.Int() - return t.goToTable[pos].describe() -} - -func (t *ParsingTable) readAction(row int, col int) actionEntry { - return t.actionTable[row*t.terminalCount+col] -} - -func (t *ParsingTable) writeAction(row int, col int, act actionEntry) { - t.actionTable[row*t.terminalCount+col] = act -} - -func (t *ParsingTable) writeGoTo(state stateNum, sym symbol.Symbol, nextState stateNum) { - pos := state.Int()*t.nonTerminalCount + sym.Num().Int() - t.goToTable[pos] = newGoToEntry(nextState) -} - -type lrTableBuilder struct { - automaton *lr0Automaton - prods *productionSet - termCount int - nonTermCount int - symTab *symbol.SymbolTableReader - precAndAssoc *precAndAssoc - - conflicts []conflict -} - -func (b *lrTableBuilder) build() (*ParsingTable, error) { - var ptab *ParsingTable - { - initialState := b.automaton.states[b.automaton.initialState] - ptab = &ParsingTable{ - actionTable: make([]actionEntry, len(b.automaton.states)*b.termCount), - goToTable: make([]goToEntry, len(b.automaton.states)*b.nonTermCount), - stateCount: len(b.automaton.states), - terminalCount: b.termCount, - nonTerminalCount: b.nonTermCount, - errorTrapperStates: make([]int, len(b.automaton.states)), - InitialState: initialState.num, - } - } - - for _, state := range b.automaton.states { - if state.isErrorTrapper { - ptab.errorTrapperStates[state.num] = 1 - } - - for sym, kID := range state.next { - nextState := b.automaton.states[kID] - if sym.IsTerminal() { - b.writeShiftAction(ptab, state.num, sym, nextState.num) - } else { - ptab.writeGoTo(state.num, sym, nextState.num) - } - } - - for prodID := range state.reducible { - reducibleProd, ok := b.prods.findByID(prodID) - if !ok { - return nil, fmt.Errorf("reducible production not found: %v", prodID) - } - - var reducibleItem *lrItem - for _, item := range state.items { - if item.prod != reducibleProd.id { - continue - } - - reducibleItem = item - break - } - if reducibleItem == nil { - for _, item := range state.emptyProdItems { - if item.prod != reducibleProd.id { - continue - } - - reducibleItem = item - break - } - if reducibleItem == nil { - return nil, fmt.Errorf("reducible item not found; state: %v, production: %v", state.num, reducibleProd.num) - } - } - - for a := range reducibleItem.lookAhead.symbols { - b.writeReduceAction(ptab, state.num, a, reducibleProd.num) - } - } - } - - return ptab, nil -} - -// writeShiftAction writes a shift action to the parsing table. When a shift/reduce conflict occurred, -// we prioritize the shift action. -func (b *lrTableBuilder) writeShiftAction(tab *ParsingTable, state stateNum, sym symbol.Symbol, nextState stateNum) { - act := tab.readAction(state.Int(), sym.Num().Int()) - if !act.isEmpty() { - ty, _, p := act.describe() - if ty == ActionTypeReduce { - act, method := b.resolveSRConflict(sym.Num(), p) - b.conflicts = append(b.conflicts, &shiftReduceConflict{ - state: state, - sym: sym, - nextState: nextState, - prodNum: p, - resolvedBy: method, - }) - if act == ActionTypeShift { - tab.writeAction(state.Int(), sym.Num().Int(), newShiftActionEntry(nextState)) - } - return - } - } - tab.writeAction(state.Int(), sym.Num().Int(), newShiftActionEntry(nextState)) -} - -// writeReduceAction writes a reduce action to the parsing table. When a shift/reduce conflict occurred, -// we prioritize the shift action, and when a reduce/reduce conflict we prioritize the action that reduces -// the production with higher priority. Productions defined earlier in the grammar file have a higher priority. -func (b *lrTableBuilder) writeReduceAction(tab *ParsingTable, state stateNum, sym symbol.Symbol, prod productionNum) { - act := tab.readAction(state.Int(), sym.Num().Int()) - if !act.isEmpty() { - ty, s, p := act.describe() - switch ty { - case ActionTypeReduce: - if p == prod { - return - } - - b.conflicts = append(b.conflicts, &reduceReduceConflict{ - state: state, - sym: sym, - prodNum1: p, - prodNum2: prod, - resolvedBy: ResolvedByProdOrder, - }) - if p < prod { - tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(p)) - } else { - tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod)) - } - case ActionTypeShift: - act, method := b.resolveSRConflict(sym.Num(), prod) - b.conflicts = append(b.conflicts, &shiftReduceConflict{ - state: state, - sym: sym, - nextState: s, - prodNum: prod, - resolvedBy: method, - }) - if act == ActionTypeReduce { - tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod)) - } - } - return - } - tab.writeAction(state.Int(), sym.Num().Int(), newReduceActionEntry(prod)) -} - -func (b *lrTableBuilder) resolveSRConflict(sym symbol.SymbolNum, prod productionNum) (ActionType, conflictResolutionMethod) { - symPrec := b.precAndAssoc.terminalPrecedence(sym) - prodPrec := b.precAndAssoc.productionPredence(prod) - if symPrec == 0 || prodPrec == 0 { - return ActionTypeShift, ResolvedByShift - } - if symPrec == prodPrec { - assoc := b.precAndAssoc.productionAssociativity(prod) - if assoc != assocTypeLeft { - return ActionTypeShift, ResolvedByAssoc - } - return ActionTypeReduce, ResolvedByAssoc - } - if symPrec < prodPrec { - return ActionTypeShift, ResolvedByPrec - } - return ActionTypeReduce, ResolvedByPrec -} - -func (b *lrTableBuilder) genReport(tab *ParsingTable, gram *Grammar) (*spec.Report, error) { - var terms []*spec.Terminal - { - termSyms := b.symTab.TerminalSymbols() - terms = make([]*spec.Terminal, len(termSyms)+1) - - for _, sym := range termSyms { - name, ok := b.symTab.ToText(sym) - if !ok { - return nil, fmt.Errorf("failed to generate terminals: symbol not found: %v", sym) - } - - term := &spec.Terminal{ - Number: sym.Num().Int(), - Name: name, - } - - prec := b.precAndAssoc.terminalPrecedence(sym.Num()) - if prec != precNil { - term.Precedence = prec - } - - assoc := b.precAndAssoc.terminalAssociativity(sym.Num()) - switch assoc { - case assocTypeLeft: - term.Associativity = "l" - case assocTypeRight: - term.Associativity = "r" - } - - terms[sym.Num()] = term - } - } - - var nonTerms []*spec.NonTerminal - { - nonTermSyms := b.symTab.NonTerminalSymbols() - nonTerms = make([]*spec.NonTerminal, len(nonTermSyms)+1) - for _, sym := range nonTermSyms { - name, ok := b.symTab.ToText(sym) - if !ok { - return nil, fmt.Errorf("failed to generate non-terminals: symbol not found: %v", sym) - } - - nonTerms[sym.Num()] = &spec.NonTerminal{ - Number: sym.Num().Int(), - Name: name, - } - } - } - - var prods []*spec.Production - { - ps := gram.productionSet.getAllProductions() - prods = make([]*spec.Production, len(ps)+1) - for _, p := range ps { - rhs := make([]int, len(p.rhs)) - for i, e := range p.rhs { - if e.IsTerminal() { - rhs[i] = e.Num().Int() - } else { - rhs[i] = e.Num().Int() * -1 - } - } - - prod := &spec.Production{ - Number: p.num.Int(), - LHS: p.lhs.Num().Int(), - RHS: rhs, - } - - prec := b.precAndAssoc.productionPredence(p.num) - if prec != precNil { - prod.Precedence = prec - } - - assoc := b.precAndAssoc.productionAssociativity(p.num) - switch assoc { - case assocTypeLeft: - prod.Associativity = "l" - case assocTypeRight: - prod.Associativity = "r" - } - - prods[p.num.Int()] = prod - } - } - - var states []*spec.State - { - srConflicts := map[stateNum][]*shiftReduceConflict{} - rrConflicts := map[stateNum][]*reduceReduceConflict{} - for _, con := range b.conflicts { - switch c := con.(type) { - case *shiftReduceConflict: - srConflicts[c.state] = append(srConflicts[c.state], c) - case *reduceReduceConflict: - rrConflicts[c.state] = append(rrConflicts[c.state], c) - } - } - - states = make([]*spec.State, len(b.automaton.states)) - for _, s := range b.automaton.states { - kernel := make([]*spec.Item, len(s.items)) - for i, item := range s.items { - p, ok := b.prods.findByID(item.prod) - if !ok { - return nil, fmt.Errorf("failed to generate states: production of kernel item not found: %v", item.prod) - } - - kernel[i] = &spec.Item{ - Production: p.num.Int(), - Dot: item.dot, - } - } - - sort.Slice(kernel, func(i, j int) bool { - if kernel[i].Production < kernel[j].Production { - return true - } - if kernel[i].Production > kernel[j].Production { - return false - } - return kernel[i].Dot < kernel[j].Dot - }) - - var shift []*spec.Transition - var reduce []*spec.Reduce - var goTo []*spec.Transition - { - TERMINALS_LOOP: - for _, t := range b.symTab.TerminalSymbols() { - act, next, prod := tab.getAction(s.num, t.Num()) - switch act { - case ActionTypeShift: - shift = append(shift, &spec.Transition{ - Symbol: t.Num().Int(), - State: next.Int(), - }) - case ActionTypeReduce: - for _, r := range reduce { - if r.Production == prod.Int() { - r.LookAhead = append(r.LookAhead, t.Num().Int()) - continue TERMINALS_LOOP - } - } - reduce = append(reduce, &spec.Reduce{ - LookAhead: []int{t.Num().Int()}, - Production: prod.Int(), - }) - } - } - - for _, n := range b.symTab.NonTerminalSymbols() { - ty, next := tab.getGoTo(s.num, n.Num()) - if ty == GoToTypeRegistered { - goTo = append(goTo, &spec.Transition{ - Symbol: n.Num().Int(), - State: next.Int(), - }) - } - } - - sort.Slice(shift, func(i, j int) bool { - return shift[i].State < shift[j].State - }) - sort.Slice(reduce, func(i, j int) bool { - return reduce[i].Production < reduce[j].Production - }) - sort.Slice(goTo, func(i, j int) bool { - return goTo[i].State < goTo[j].State - }) - } - - sr := []*spec.SRConflict{} - rr := []*spec.RRConflict{} - { - for _, c := range srConflicts[s.num] { - conflict := &spec.SRConflict{ - Symbol: c.sym.Num().Int(), - State: c.nextState.Int(), - Production: c.prodNum.Int(), - ResolvedBy: c.resolvedBy.Int(), - } - - ty, s, p := tab.getAction(s.num, c.sym.Num()) - switch ty { - case ActionTypeShift: - n := s.Int() - conflict.AdoptedState = &n - case ActionTypeReduce: - n := p.Int() - conflict.AdoptedProduction = &n - } - - sr = append(sr, conflict) - } - - sort.Slice(sr, func(i, j int) bool { - return sr[i].Symbol < sr[j].Symbol - }) - - for _, c := range rrConflicts[s.num] { - conflict := &spec.RRConflict{ - Symbol: c.sym.Num().Int(), - Production1: c.prodNum1.Int(), - Production2: c.prodNum2.Int(), - ResolvedBy: c.resolvedBy.Int(), - } - - _, _, p := tab.getAction(s.num, c.sym.Num()) - conflict.AdoptedProduction = p.Int() - - rr = append(rr, conflict) - } - - sort.Slice(rr, func(i, j int) bool { - return rr[i].Symbol < rr[j].Symbol - }) - } - - states[s.num.Int()] = &spec.State{ - Number: s.num.Int(), - Kernel: kernel, - Shift: shift, - Reduce: reduce, - GoTo: goTo, - SRConflict: sr, - RRConflict: rr, - } - } - } - - return &spec.Report{ - Terminals: terms, - NonTerminals: nonTerms, - Productions: prods, - States: states, - }, nil -} diff --git a/grammar/parsing_table_test.go b/grammar/parsing_table_test.go deleted file mode 100644 index 098adf9..0000000 --- a/grammar/parsing_table_test.go +++ /dev/null @@ -1,387 +0,0 @@ -package grammar - -import ( - "fmt" - "strings" - "testing" - - "grammar/symbol" - "spec/grammar/parser" -) - -type expectedState struct { - kernelItems []*lrItem - acts map[symbol.Symbol]testActionEntry - goTos map[symbol.Symbol][]*lrItem -} - -func TestGenLALRParsingTable(t *testing.T) { - src := ` -#name test; - -s: l eq r | r; -l: ref r | id; -r: l; -eq: '='; -ref: '*'; -id: "[A-Za-z0-9_]+"; -` - - var ptab *ParsingTable - var automaton *lalr1Automaton - var gram *Grammar - var nonTermCount int - var termCount int - { - ast, err := parser.Parse(strings.NewReader(src)) - if err != nil { - t.Fatal(err) - } - b := GrammarBuilder{ - AST: ast, - } - gram, err = b.build() - if err != nil { - t.Fatal(err) - } - first, err := genFirstSet(gram.productionSet) - if err != nil { - t.Fatal(err) - } - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - t.Fatal(err) - } - automaton, err = genLALR1Automaton(lr0, gram.productionSet, first) - if err != nil { - t.Fatal(err) - } - - nonTermTexts, err := gram.symbolTable.NonTerminalTexts() - if err != nil { - t.Fatal(err) - } - termTexts, err := gram.symbolTable.TerminalTexts() - if err != nil { - t.Fatal(err) - } - nonTermCount = len(nonTermTexts) - termCount = len(termTexts) - - lalr := &lrTableBuilder{ - automaton: automaton.lr0Automaton, - prods: gram.productionSet, - termCount: termCount, - nonTermCount: nonTermCount, - symTab: gram.symbolTable, - } - ptab, err = lalr.build() - if err != nil { - t.Fatalf("failed to create a LALR parsing table: %v", err) - } - if ptab == nil { - t.Fatal("genLALRParsingTable returns nil without any error") - } - } - - genSym := newTestSymbolGenerator(t, gram.symbolTable) - genProd := newTestProductionGenerator(t, genSym) - genLR0Item := newTestLR0ItemGenerator(t, genProd) - - expectedKernels := map[int][]*lrItem{ - 0: { - withLookAhead(genLR0Item("s'", 0, "s"), symbol.SymbolEOF), - }, - 1: { - withLookAhead(genLR0Item("s'", 1, "s"), symbol.SymbolEOF), - }, - 2: { - withLookAhead(genLR0Item("s", 1, "l", "eq", "r"), symbol.SymbolEOF), - withLookAhead(genLR0Item("r", 1, "l"), symbol.SymbolEOF), - }, - 3: { - withLookAhead(genLR0Item("s", 1, "r"), symbol.SymbolEOF), - }, - 4: { - withLookAhead(genLR0Item("l", 1, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 5: { - withLookAhead(genLR0Item("l", 1, "id"), genSym("eq"), symbol.SymbolEOF), - }, - 6: { - withLookAhead(genLR0Item("s", 2, "l", "eq", "r"), symbol.SymbolEOF), - }, - 7: { - withLookAhead(genLR0Item("l", 2, "ref", "r"), genSym("eq"), symbol.SymbolEOF), - }, - 8: { - withLookAhead(genLR0Item("r", 1, "l"), genSym("eq"), symbol.SymbolEOF), - }, - 9: { - withLookAhead(genLR0Item("s", 3, "l", "eq", "r"), symbol.SymbolEOF), - }, - } - - expectedStates := []expectedState{ - { - kernelItems: expectedKernels[0], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("s"): expectedKernels[1], - genSym("l"): expectedKernels[2], - genSym("r"): expectedKernels[3], - }, - }, - { - kernelItems: expectedKernels[1], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s'", "s"), - }, - }, - }, - { - kernelItems: expectedKernels[2], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeShift, - nextState: expectedKernels[6], - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - }, - }, - { - kernelItems: expectedKernels[3], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s", "r"), - }, - }, - }, - { - kernelItems: expectedKernels[4], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("r"): expectedKernels[7], - genSym("l"): expectedKernels[8], - }, - }, - { - kernelItems: expectedKernels[5], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("l", "id"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("l", "id"), - }, - }, - }, - { - kernelItems: expectedKernels[6], - acts: map[symbol.Symbol]testActionEntry{ - genSym("ref"): { - ty: ActionTypeShift, - nextState: expectedKernels[4], - }, - genSym("id"): { - ty: ActionTypeShift, - nextState: expectedKernels[5], - }, - }, - goTos: map[symbol.Symbol][]*lrItem{ - genSym("l"): expectedKernels[8], - genSym("r"): expectedKernels[9], - }, - }, - { - kernelItems: expectedKernels[7], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("l", "ref", "r"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("l", "ref", "r"), - }, - }, - }, - { - kernelItems: expectedKernels[8], - acts: map[symbol.Symbol]testActionEntry{ - genSym("eq"): { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("r", "l"), - }, - }, - }, - { - kernelItems: expectedKernels[9], - acts: map[symbol.Symbol]testActionEntry{ - symbol.SymbolEOF: { - ty: ActionTypeReduce, - production: genProd("s", "l", "eq", "r"), - }, - }, - }, - } - - t.Run("initial state", func(t *testing.T) { - iniState := findStateByNum(automaton.states, ptab.InitialState) - if iniState == nil { - t.Fatalf("the initial state was not found: #%v", ptab.InitialState) - } - eIniState, err := newKernel(expectedKernels[0]) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - if iniState.id != eIniState.id { - t.Fatalf("the initial state is mismatched; want: %v, got: %v", eIniState.id, iniState.id) - } - }) - - for i, eState := range expectedStates { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - k, err := newKernel(eState.kernelItems) - if err != nil { - t.Fatalf("failed to create a kernel item: %v", err) - } - state, ok := automaton.states[k.id] - if !ok { - t.Fatalf("state was not found: #%v", 0) - } - - testAction(t, &eState, state, ptab, automaton.lr0Automaton, gram, termCount) - testGoTo(t, &eState, state, ptab, automaton.lr0Automaton, nonTermCount) - }) - } -} - -func testAction(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, gram *Grammar, termCount int) { - nonEmptyEntries := map[symbol.SymbolNum]struct{}{} - for eSym, eAct := range expectedState.acts { - nonEmptyEntries[eSym.Num()] = struct{}{} - - ty, stateNum, prodNum := ptab.getAction(state.num, eSym.Num()) - if ty != eAct.ty { - t.Fatalf("action type is mismatched; want: %v, got: %v", eAct.ty, ty) - } - switch eAct.ty { - case ActionTypeShift: - eNextState, err := newKernel(eAct.nextState) - if err != nil { - t.Fatal(err) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found; state: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - case ActionTypeReduce: - prod := findProductionByNum(gram.productionSet, prodNum) - if prod == nil { - t.Fatalf("production was not found: #%v", prodNum) - } - if prod.id != eAct.production.id { - t.Fatalf("production is mismatched; symbol: %v, want: %v, got: %v", eSym, eAct.production.id, prod.id) - } - } - } - for symNum := 0; symNum < termCount; symNum++ { - if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { - continue - } - ty, stateNum, prodNum := ptab.getAction(state.num, symbol.SymbolNum(symNum)) - if ty != ActionTypeError { - t.Errorf("unexpected ACTION entry; state: #%v, symbol: #%v, action type: %v, next state: #%v, prodction: #%v", state.num, symNum, ty, stateNum, prodNum) - } - } -} - -func testGoTo(t *testing.T, expectedState *expectedState, state *lrState, ptab *ParsingTable, automaton *lr0Automaton, nonTermCount int) { - nonEmptyEntries := map[symbol.SymbolNum]struct{}{} - for eSym, eGoTo := range expectedState.goTos { - nonEmptyEntries[eSym.Num()] = struct{}{} - - eNextState, err := newKernel(eGoTo) - if err != nil { - t.Fatal(err) - } - ty, stateNum := ptab.getGoTo(state.num, eSym.Num()) - if ty != GoToTypeRegistered { - t.Fatalf("GOTO entry was not found; state: #%v, symbol: #%v", state.num, eSym) - } - nextState := findStateByNum(automaton.states, stateNum) - if nextState == nil { - t.Fatalf("state was not found: #%v", stateNum) - } - if nextState.id != eNextState.id { - t.Fatalf("next state is mismatched; symbol: %v, want: %v, got: %v", eSym, eNextState.id, nextState.id) - } - } - for symNum := 0; symNum < nonTermCount; symNum++ { - if _, checked := nonEmptyEntries[symbol.SymbolNum(symNum)]; checked { - continue - } - ty, _ := ptab.getGoTo(state.num, symbol.SymbolNum(symNum)) - if ty != GoToTypeError { - t.Errorf("unexpected GOTO entry; state: #%v, symbol: #%v", state.num, symNum) - } - } -} - -type testActionEntry struct { - ty ActionType - nextState []*lrItem - production *production -} - -func findStateByNum(states map[kernelID]*lrState, num stateNum) *lrState { - for _, state := range states { - if state.num == num { - return state - } - } - return nil -} - -func findProductionByNum(prods *productionSet, num productionNum) *production { - for _, prod := range prods.getAllProductions() { - if prod.num == num { - return prod - } - } - return nil -} diff --git a/grammar/production.go b/grammar/production.go deleted file mode 100644 index 1742dc6..0000000 --- a/grammar/production.go +++ /dev/null @@ -1,117 +0,0 @@ -package grammar - -import ( - "crypto/sha256" - "encoding/hex" - "fmt" - - "grammar/symbol" -) - -type productionID [32]byte - -func (id productionID) String() string { - return hex.EncodeToString(id[:]) -} - -func genProductionID(lhs symbol.Symbol, rhs []symbol.Symbol) productionID { - seq := lhs.Byte() - for _, sym := range rhs { - seq = append(seq, sym.Byte()...) - } - return productionID(sha256.Sum256(seq)) -} - -type productionNum uint16 - -const ( - productionNumNil = productionNum(0) - productionNumStart = productionNum(1) - productionNumMin = productionNum(2) -) - -func (n productionNum) Int() int { - return int(n) -} - -type production struct { - id productionID - num productionNum - lhs symbol.Symbol - rhs []symbol.Symbol - rhsLen int -} - -func newProduction(lhs symbol.Symbol, rhs []symbol.Symbol) (*production, error) { - if lhs.IsNil() { - return nil, fmt.Errorf("LHS must be a non-nil symbol; LHS: %v, RHS: %v", lhs, rhs) - } - for _, sym := range rhs { - if sym.IsNil() { - return nil, fmt.Errorf("a symbol of RHS must be a non-nil symbol; LHS: %v, RHS: %v", lhs, rhs) - } - } - - return &production{ - id: genProductionID(lhs, rhs), - lhs: lhs, - rhs: rhs, - rhsLen: len(rhs), - }, nil -} - -func (p *production) isEmpty() bool { - return p.rhsLen == 0 -} - -type productionSet struct { - lhs2Prods map[symbol.Symbol][]*production - id2Prod map[productionID]*production - num productionNum -} - -func newProductionSet() *productionSet { - return &productionSet{ - lhs2Prods: map[symbol.Symbol][]*production{}, - id2Prod: map[productionID]*production{}, - num: productionNumMin, - } -} - -func (ps *productionSet) append(prod *production) { - if _, ok := ps.id2Prod[prod.id]; ok { - return - } - - if prod.lhs.IsStart() { - prod.num = productionNumStart - } else { - prod.num = ps.num - ps.num++ - } - - if prods, ok := ps.lhs2Prods[prod.lhs]; ok { - ps.lhs2Prods[prod.lhs] = append(prods, prod) - } else { - ps.lhs2Prods[prod.lhs] = []*production{prod} - } - ps.id2Prod[prod.id] = prod -} - -func (ps *productionSet) findByID(id productionID) (*production, bool) { - prod, ok := ps.id2Prod[id] - return prod, ok -} - -func (ps *productionSet) findByLHS(lhs symbol.Symbol) ([]*production, bool) { - if lhs.IsNil() { - return nil, false - } - - prods, ok := ps.lhs2Prods[lhs] - return prods, ok -} - -func (ps *productionSet) getAllProductions() map[productionID]*production { - return ps.id2Prod -} diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go deleted file mode 100644 index 88a6b17..0000000 --- a/grammar/semantic_error.go +++ /dev/null @@ -1,30 +0,0 @@ -package grammar - -import "errors" - -var ( - semErrNoGrammarName = errors.New("name is missing") - semErrSpellingInconsistency = errors.New("the identifiers are treated as the same. please use the same spelling") - semErrDuplicateAssoc = errors.New("associativity and precedence cannot be specified multiple times for a symbol") - semErrUndefinedPrec = errors.New("symbol must has precedence") - semErrUndefinedOrdSym = errors.New("undefined ordered symbol") - semErrUnusedProduction = errors.New("unused production") - semErrUnusedTerminal = errors.New("unused terminal") - semErrTermCannotBeSkipped = errors.New("a terminal used in productions cannot be skipped") - semErrNoProduction = errors.New("a grammar needs at least one production") - semErrUndefinedSym = errors.New("undefined symbol") - semErrDuplicateProduction = errors.New("duplicate production") - semErrDuplicateTerminal = errors.New("duplicate terminal") - semErrDuplicateFragment = errors.New("duplicate fragment") - semErrDuplicateName = errors.New("duplicate names are not allowed between terminals and non-terminals") - semErrErrSymIsReserved = errors.New("symbol 'error' is reserved as a terminal symbol") - semErrDuplicateLabel = errors.New("a label must be unique in an alternative") - semErrInvalidLabel = errors.New("a label must differ from terminal symbols or non-terminal symbols") - semErrDirInvalidName = errors.New("invalid directive name") - semErrDirInvalidParam = errors.New("invalid parameter") - semErrDuplicateDir = errors.New("a directive must not be duplicated") - semErrDuplicateElem = errors.New("duplicate element") - semErrAmbiguousElem = errors.New("ambiguous element") - semErrInvalidProdDir = errors.New("invalid production directive") - semErrInvalidAltDir = errors.New("invalid alternative directive") -) diff --git a/grammar/symbol/symbol.go b/grammar/symbol/symbol.go deleted file mode 100644 index f9e6a93..0000000 --- a/grammar/symbol/symbol.go +++ /dev/null @@ -1,295 +0,0 @@ -package symbol - -import ( - "fmt" - "sort" -) - -type symbolKind string - -const ( - symbolKindNonTerminal = symbolKind("non-terminal") - symbolKindTerminal = symbolKind("terminal") -) - -func (t symbolKind) String() string { - return string(t) -} - -type SymbolNum uint16 - -func (n SymbolNum) Int() int { - return int(n) -} - -type Symbol uint16 - -func (s Symbol) String() string { - kind, isStart, isEOF, num := s.describe() - var prefix string - switch { - case isStart: - prefix = "s" - case isEOF: - prefix = "e" - case kind == symbolKindNonTerminal: - prefix = "n" - case kind == symbolKindTerminal: - prefix = "t" - default: - prefix = "?" - } - return fmt.Sprintf("%v%v", prefix, num) -} - -const ( - maskKindPart = uint16(0x8000) // 1000 0000 0000 0000 - maskNonTerminal = uint16(0x0000) // 0000 0000 0000 0000 - maskTerminal = uint16(0x8000) // 1000 0000 0000 0000 - - maskSubKindpart = uint16(0x4000) // 0100 0000 0000 0000 - maskNonStartAndEOF = uint16(0x0000) // 0000 0000 0000 0000 - maskStartOrEOF = uint16(0x4000) // 0100 0000 0000 0000 - - maskNumberPart = uint16(0x3fff) // 0011 1111 1111 1111 - - symbolNumStart = uint16(0x0001) // 0000 0000 0000 0001 - symbolNumEOF = uint16(0x0001) // 0000 0000 0000 0001 - - SymbolNil = Symbol(0) // 0000 0000 0000 0000 - symbolStart = Symbol(maskNonTerminal | maskStartOrEOF | symbolNumStart) // 0100 0000 0000 0001 - SymbolEOF = Symbol(maskTerminal | maskStartOrEOF | symbolNumEOF) // 1100 0000 0000 0001: The EOF symbol is treated as a terminal symbol. - - // The symbol name contains `<` and `>` to avoid conflicting with user-defined symbols. - symbolNameEOF = "<eof>" - - nonTerminalNumMin = SymbolNum(2) // The number 1 is used by a start symbol. - terminalNumMin = SymbolNum(2) // The number 1 is used by the EOF symbol. - symbolNumMax = SymbolNum(0xffff) >> 2 // 0011 1111 1111 1111 -) - -func newSymbol(kind symbolKind, isStart bool, num SymbolNum) (Symbol, error) { - if num > symbolNumMax { - return SymbolNil, fmt.Errorf("a symbol number exceeds the limit; limit: %v, passed: %v", symbolNumMax, num) - } - if kind == symbolKindTerminal && isStart { - return SymbolNil, fmt.Errorf("a start symbol must be a non-terminal symbol") - } - - kindMask := maskNonTerminal - if kind == symbolKindTerminal { - kindMask = maskTerminal - } - startMask := maskNonStartAndEOF - if isStart { - startMask = maskStartOrEOF - } - return Symbol(kindMask | startMask | uint16(num)), nil -} - -func (s Symbol) Num() SymbolNum { - _, _, _, num := s.describe() - return num -} - -func (s Symbol) Byte() []byte { - if s.IsNil() { - return []byte{0, 0} - } - return []byte{byte(uint16(s) >> 8), byte(uint16(s) & 0x00ff)} -} - -func (s Symbol) IsNil() bool { - _, _, _, num := s.describe() - return num == 0 -} - -func (s Symbol) IsStart() bool { - if s.IsNil() { - return false - } - _, isStart, _, _ := s.describe() - return isStart -} - -func (s Symbol) isEOF() bool { - if s.IsNil() { - return false - } - _, _, isEOF, _ := s.describe() - return isEOF -} - -func (s Symbol) isNonTerminal() bool { - if s.IsNil() { - return false - } - kind, _, _, _ := s.describe() - return kind == symbolKindNonTerminal -} - -func (s Symbol) IsTerminal() bool { - if s.IsNil() { - return false - } - return !s.isNonTerminal() -} - -func (s Symbol) describe() (symbolKind, bool, bool, SymbolNum) { - kind := symbolKindNonTerminal - if uint16(s)&maskKindPart > 0 { - kind = symbolKindTerminal - } - isStart := false - isEOF := false - if uint16(s)&maskSubKindpart > 0 { - if kind == symbolKindNonTerminal { - isStart = true - } else { - isEOF = true - } - } - num := SymbolNum(uint16(s) & maskNumberPart) - return kind, isStart, isEOF, num -} - -type SymbolTable struct { - text2Sym map[string]Symbol - sym2Text map[Symbol]string - nonTermTexts []string - termTexts []string - nonTermNum SymbolNum - termNum SymbolNum -} - -type SymbolTableWriter struct { - *SymbolTable -} - -type SymbolTableReader struct { - *SymbolTable -} - -func NewSymbolTable() *SymbolTable { - return &SymbolTable{ - text2Sym: map[string]Symbol{ - symbolNameEOF: SymbolEOF, - }, - sym2Text: map[Symbol]string{ - SymbolEOF: symbolNameEOF, - }, - termTexts: []string{ - "", // Nil - symbolNameEOF, // EOF - }, - nonTermTexts: []string{ - "", // Nil - "", // Start Symbol - }, - nonTermNum: nonTerminalNumMin, - termNum: terminalNumMin, - } -} - -func (t *SymbolTable) Writer() *SymbolTableWriter { - return &SymbolTableWriter{ - SymbolTable: t, - } -} - -func (t *SymbolTable) Reader() *SymbolTableReader { - return &SymbolTableReader{ - SymbolTable: t, - } -} - -func (w *SymbolTableWriter) RegisterStartSymbol(text string) (Symbol, error) { - w.text2Sym[text] = symbolStart - w.sym2Text[symbolStart] = text - w.nonTermTexts[symbolStart.Num().Int()] = text - return symbolStart, nil -} - -func (w *SymbolTableWriter) RegisterNonTerminalSymbol(text string) (Symbol, error) { - if sym, ok := w.text2Sym[text]; ok { - return sym, nil - } - sym, err := newSymbol(symbolKindNonTerminal, false, w.nonTermNum) - if err != nil { - return SymbolNil, err - } - w.nonTermNum++ - w.text2Sym[text] = sym - w.sym2Text[sym] = text - w.nonTermTexts = append(w.nonTermTexts, text) - return sym, nil -} - -func (w *SymbolTableWriter) RegisterTerminalSymbol(text string) (Symbol, error) { - if sym, ok := w.text2Sym[text]; ok { - return sym, nil - } - sym, err := newSymbol(symbolKindTerminal, false, w.termNum) - if err != nil { - return SymbolNil, err - } - w.termNum++ - w.text2Sym[text] = sym - w.sym2Text[sym] = text - w.termTexts = append(w.termTexts, text) - return sym, nil -} - -func (r *SymbolTableReader) ToSymbol(text string) (Symbol, bool) { - if sym, ok := r.text2Sym[text]; ok { - return sym, true - } - return SymbolNil, false -} - -func (r *SymbolTableReader) ToText(sym Symbol) (string, bool) { - text, ok := r.sym2Text[sym] - return text, ok -} - -func (r *SymbolTableReader) TerminalSymbols() []Symbol { - syms := make([]Symbol, 0, r.termNum.Int()-terminalNumMin.Int()) - for sym := range r.sym2Text { - if !sym.IsTerminal() || sym.IsNil() { - continue - } - syms = append(syms, sym) - } - sort.Slice(syms, func(i, j int) bool { - return syms[i] < syms[j] - }) - return syms -} - -func (r *SymbolTableReader) TerminalTexts() ([]string, error) { - if r.termNum == terminalNumMin { - return nil, fmt.Errorf("symbol table has no terminals") - } - return r.termTexts, nil -} - -func (r *SymbolTableReader) NonTerminalSymbols() []Symbol { - syms := make([]Symbol, 0, r.nonTermNum.Int()-nonTerminalNumMin.Int()) - for sym := range r.sym2Text { - if !sym.isNonTerminal() || sym.IsNil() { - continue - } - syms = append(syms, sym) - } - sort.Slice(syms, func(i, j int) bool { - return syms[i] < syms[j] - }) - return syms -} - -func (r *SymbolTableReader) NonTerminalTexts() ([]string, error) { - if r.nonTermNum == nonTerminalNumMin || r.nonTermTexts[symbolStart.Num().Int()] == "" { - return nil, fmt.Errorf("symbol table has no terminals or no start symbol") - } - return r.nonTermTexts, nil -} diff --git a/grammar/symbol/symbol_test.go b/grammar/symbol/symbol_test.go deleted file mode 100644 index 31c3edd..0000000 --- a/grammar/symbol/symbol_test.go +++ /dev/null @@ -1,159 +0,0 @@ -package symbol - -import "testing" - -func TestSymbol(t *testing.T) { - tab := NewSymbolTable() - w := tab.Writer() - _, _ = w.RegisterStartSymbol("expr'") - _, _ = w.RegisterNonTerminalSymbol("expr") - _, _ = w.RegisterNonTerminalSymbol("term") - _, _ = w.RegisterNonTerminalSymbol("factor") - _, _ = w.RegisterTerminalSymbol("id") - _, _ = w.RegisterTerminalSymbol("add") - _, _ = w.RegisterTerminalSymbol("mul") - _, _ = w.RegisterTerminalSymbol("l_paren") - _, _ = w.RegisterTerminalSymbol("r_paren") - - nonTermTexts := []string{ - "", // Nil - "expr'", - "expr", - "term", - "factor", - } - - termTexts := []string{ - "", // Nil - symbolNameEOF, // EOF - "id", - "add", - "mul", - "l_paren", - "r_paren", - } - - tests := []struct { - text string - isNil bool - isStart bool - isEOF bool - isNonTerminal bool - isTerminal bool - }{ - { - text: "expr'", - isStart: true, - isNonTerminal: true, - }, - { - text: "expr", - isNonTerminal: true, - }, - { - text: "term", - isNonTerminal: true, - }, - { - text: "factor", - isNonTerminal: true, - }, - { - text: "id", - isTerminal: true, - }, - { - text: "add", - isTerminal: true, - }, - { - text: "mul", - isTerminal: true, - }, - { - text: "l_paren", - isTerminal: true, - }, - { - text: "r_paren", - isTerminal: true, - }, - } - for _, tt := range tests { - t.Run(tt.text, func(t *testing.T) { - r := tab.Reader() - sym, ok := r.ToSymbol(tt.text) - if !ok { - t.Fatalf("symbol was not found") - } - testSymbolProperty(t, sym, tt.isNil, tt.isStart, tt.isEOF, tt.isNonTerminal, tt.isTerminal) - text, ok := r.ToText(sym) - if !ok { - t.Fatalf("text was not found") - } - if text != tt.text { - t.Fatalf("unexpected text representation; want: %v, got: %v", tt.text, text) - } - }) - } - - t.Run("EOF", func(t *testing.T) { - testSymbolProperty(t, SymbolEOF, false, false, true, false, true) - }) - - t.Run("Nil", func(t *testing.T) { - testSymbolProperty(t, SymbolNil, true, false, false, false, false) - }) - - t.Run("texts of non-terminals", func(t *testing.T) { - r := tab.Reader() - ts, err := r.NonTerminalTexts() - if err != nil { - t.Fatal(err) - } - if len(ts) != len(nonTermTexts) { - t.Fatalf("unexpected non-terminal count; want: %v (%#v), got: %v (%#v)", len(nonTermTexts), nonTermTexts, len(ts), ts) - } - for i, text := range ts { - if text != nonTermTexts[i] { - t.Fatalf("unexpected non-terminal; want: %v, got: %v", nonTermTexts[i], text) - } - } - }) - - t.Run("texts of terminals", func(t *testing.T) { - r := tab.Reader() - ts, err := r.TerminalTexts() - if err != nil { - t.Fatal(err) - } - if len(ts) != len(termTexts) { - t.Fatalf("unexpected terminal count; want: %v (%#v), got: %v (%#v)", len(termTexts), termTexts, len(ts), ts) - } - for i, text := range ts { - if text != termTexts[i] { - t.Fatalf("unexpected terminal; want: %v, got: %v", termTexts[i], text) - } - } - }) -} - -func testSymbolProperty(t *testing.T, sym Symbol, isNil, isStart, isEOF, isNonTerminal, isTerminal bool) { - t.Helper() - - if v := sym.IsNil(); v != isNil { - t.Fatalf("isNil property is mismatched; want: %v, got: %v", isNil, v) - } - if v := sym.IsStart(); v != isStart { - t.Fatalf("isStart property is mismatched; want: %v, got: %v", isStart, v) - } - if v := sym.isEOF(); v != isEOF { - t.Fatalf("isEOF property is mismatched; want: %v, got: %v", isEOF, v) - } - if v := sym.isNonTerminal(); v != isNonTerminal { - t.Fatalf("isNonTerminal property is mismatched; want: %v, got: %v", isNonTerminal, v) - } - if v := sym.IsTerminal(); v != isTerminal { - t.Fatalf("isTerminal property is mismatched; want: %v, got: %v", isTerminal, v) - } -} diff --git a/grammar/test_helper_test.go b/grammar/test_helper_test.go deleted file mode 100644 index 63fcafb..0000000 --- a/grammar/test_helper_test.go +++ /dev/null @@ -1,68 +0,0 @@ -package grammar - -import ( - "testing" - - "grammar/symbol" -) - -type testSymbolGenerator func(text string) symbol.Symbol - -func newTestSymbolGenerator(t *testing.T, symTab *symbol.SymbolTableReader) testSymbolGenerator { - return func(text string) symbol.Symbol { - t.Helper() - - sym, ok := symTab.ToSymbol(text) - if !ok { - t.Fatalf("symbol was not found: %v", text) - } - return sym - } -} - -type testProductionGenerator func(lhs string, rhs ...string) *production - -func newTestProductionGenerator(t *testing.T, genSym testSymbolGenerator) testProductionGenerator { - return func(lhs string, rhs ...string) *production { - t.Helper() - - rhsSym := []symbol.Symbol{} - for _, text := range rhs { - rhsSym = append(rhsSym, genSym(text)) - } - prod, err := newProduction(genSym(lhs), rhsSym) - if err != nil { - t.Fatalf("failed to create a production: %v", err) - } - - return prod - } -} - -type testLR0ItemGenerator func(lhs string, dot int, rhs ...string) *lrItem - -func newTestLR0ItemGenerator(t *testing.T, genProd testProductionGenerator) testLR0ItemGenerator { - return func(lhs string, dot int, rhs ...string) *lrItem { - t.Helper() - - prod := genProd(lhs, rhs...) - item, err := newLR0Item(prod, dot) - if err != nil { - t.Fatalf("failed to create a LR0 item: %v", err) - } - - return item - } -} - -func withLookAhead(item *lrItem, lookAhead ...symbol.Symbol) *lrItem { - if item.lookAhead.symbols == nil { - item.lookAhead.symbols = map[symbol.Symbol]struct{}{} - } - - for _, a := range lookAhead { - item.lookAhead.symbols[a] = struct{}{} - } - - return item -} |