diff options
Diffstat (limited to 'src/urubu/grammar/grammar.go')
-rw-r--r-- | src/urubu/grammar/grammar.go | 1390 |
1 files changed, 0 insertions, 1390 deletions
diff --git a/src/urubu/grammar/grammar.go b/src/urubu/grammar/grammar.go deleted file mode 100644 index bfa53c6..0000000 --- a/src/urubu/grammar/grammar.go +++ /dev/null @@ -1,1390 +0,0 @@ -package grammar - -import ( - "fmt" - "io" - "strings" - - verr "urubu/error" - "urubu/grammar/lexical" - "urubu/grammar/symbol" - spec "urubu/spec/grammar" - "urubu/spec/grammar/parser" -) - -type astActionEntry struct { - position int - expansion bool -} - -type assocType string - -const ( - assocTypeNil = assocType("") - assocTypeLeft = assocType("left") - assocTypeRight = assocType("right") -) - -const ( - precNil = 0 - precMin = 1 -) - -// precAndAssoc represents precedence and associativities of terminal symbols and productions. -// We use the priority of the production to resolve shift/reduce conflicts. -type precAndAssoc struct { - // termPrec and termAssoc represent the precedence of the terminal symbols. - termPrec map[symbol.SymbolNum]int - termAssoc map[symbol.SymbolNum]assocType - - // prodPrec and prodAssoc represent the precedence and the associativities of the production. - // These values are inherited from the right-most terminal symbols in the RHS of the productions. - prodPrec map[productionNum]int - prodAssoc map[productionNum]assocType -} - -func (pa *precAndAssoc) terminalPrecedence(sym symbol.SymbolNum) int { - prec, ok := pa.termPrec[sym] - if !ok { - return precNil - } - - return prec -} - -func (pa *precAndAssoc) terminalAssociativity(sym symbol.SymbolNum) assocType { - assoc, ok := pa.termAssoc[sym] - if !ok { - return assocTypeNil - } - - return assoc -} - -func (pa *precAndAssoc) productionPredence(prod productionNum) int { - prec, ok := pa.prodPrec[prod] - if !ok { - return precNil - } - - return prec -} - -func (pa *precAndAssoc) productionAssociativity(prod productionNum) assocType { - assoc, ok := pa.prodAssoc[prod] - if !ok { - return assocTypeNil - } - - return assoc -} - -const reservedSymbolNameError = "error" - -type Grammar struct { - name string - lexSpec *lexical.LexSpec - skipSymbols []symbol.Symbol - productionSet *productionSet - augmentedStartSymbol symbol.Symbol - errorSymbol symbol.Symbol - symbolTable *symbol.SymbolTableReader - astActions map[productionID][]*astActionEntry - precAndAssoc *precAndAssoc - - // recoverProductions is a set of productions having the recover directive. - recoverProductions map[productionID]struct{} -} - -type buildConfig struct { - isReportingEnabled bool -} - -type BuildOption func(config *buildConfig) - -func EnableReporting() BuildOption { - return func(config *buildConfig) { - config.isReportingEnabled = true - } -} - -type GrammarBuilder struct { - AST *parser.RootNode - - errs verr.SpecErrors -} - -func (b *GrammarBuilder) Build(opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) { - gram, err := b.build() - if err != nil { - return nil, nil, err - } - - return compile(gram, opts...) -} - -func (b *GrammarBuilder) build() (*Grammar, error) { - var specName string - { - errOccurred := false - for _, dir := range b.AST.Directives { - if dir.Name != "name" { - continue - } - - if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'name' takes just one ID parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - - errOccurred = true - break - } - - specName = dir.Parameters[0].ID - break - } - - if specName == "" && !errOccurred { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrNoGrammarName, - }) - } - } - - b.checkSpellingInconsistenciesOfUserDefinedIDs(b.AST) - if len(b.errs) > 0 { - return nil, b.errs - } - - symTab, ss, err := b.genSymbolTable(b.AST) - if err != nil { - return nil, err - } - - lexSpec, skip, err := b.genLexSpecAndSkipSymbols(symTab.Reader(), b.AST) - if err != nil { - return nil, err - } - - prodsAndActs, err := b.genProductionsAndActions(b.AST, symTab.Reader(), ss.errSym, ss.augStartSym, ss.startSym) - if err != nil { - return nil, err - } - if prodsAndActs == nil && len(b.errs) > 0 { - return nil, b.errs - } - - pa, err := b.genPrecAndAssoc(symTab.Reader(), ss.errSym, prodsAndActs) - if err != nil { - return nil, err - } - if pa == nil && len(b.errs) > 0 { - return nil, b.errs - } - - syms := findUsedAndUnusedSymbols(b.AST) - if syms == nil && len(b.errs) > 0 { - return nil, b.errs - } - - // When a terminal symbol that cannot be reached from the start symbol has the skip directive, - // the compiler treats its terminal as a used symbol, not unused. - { - r := symTab.Reader() - for _, sym := range skip { - s, _ := r.ToText(sym) - if _, ok := syms.unusedTerminals[s]; !ok { - prod := syms.usedTerminals[s] - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrTermCannotBeSkipped, - Detail: s, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - continue - } - - delete(syms.unusedTerminals, s) - } - } - - for sym, prod := range syms.unusedProductions { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUnusedProduction, - Detail: sym, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - - for sym, prod := range syms.unusedTerminals { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUnusedTerminal, - Detail: sym, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - - if len(b.errs) > 0 { - return nil, b.errs - } - - return &Grammar{ - name: specName, - lexSpec: lexSpec, - skipSymbols: skip, - productionSet: prodsAndActs.prods, - augmentedStartSymbol: prodsAndActs.augStartSym, - errorSymbol: ss.errSym, - symbolTable: symTab.Reader(), - astActions: prodsAndActs.astActs, - recoverProductions: prodsAndActs.recoverProds, - precAndAssoc: pa, - }, nil -} - -type usedAndUnusedSymbols struct { - unusedProductions map[string]*parser.ProductionNode - unusedTerminals map[string]*parser.ProductionNode - usedTerminals map[string]*parser.ProductionNode -} - -func findUsedAndUnusedSymbols(root *parser.RootNode) *usedAndUnusedSymbols { - prods := map[string]*parser.ProductionNode{} - lexProds := map[string]*parser.ProductionNode{} - mark := map[string]bool{} - { - for _, p := range root.Productions { - prods[p.LHS] = p - mark[p.LHS] = false - for _, alt := range p.RHS { - for _, e := range alt.Elements { - if e.ID == "" { - continue - } - mark[e.ID] = false - } - } - } - - for _, p := range root.LexProductions { - lexProds[p.LHS] = p - mark[p.LHS] = false - } - - start := root.Productions[0] - mark[start.LHS] = true - markUsedSymbols(mark, map[string]bool{}, prods, start) - - // We don't have to check the error symbol because the error symbol doesn't have a production. - delete(mark, reservedSymbolNameError) - } - - usedTerms := make(map[string]*parser.ProductionNode, len(lexProds)) - unusedProds := map[string]*parser.ProductionNode{} - unusedTerms := map[string]*parser.ProductionNode{} - for sym, used := range mark { - if p, ok := prods[sym]; ok { - if used { - continue - } - unusedProds[sym] = p - continue - } - if p, ok := lexProds[sym]; ok { - if used { - usedTerms[sym] = p - } else { - unusedTerms[sym] = p - } - continue - } - - // May be reached here when a fragment name appears on the right-hand side of a production rule. However, an error - // to the effect that a production rule cannot contain a fragment will be detected in a subsequent process. So we can - // ignore it here. - } - - return &usedAndUnusedSymbols{ - usedTerminals: usedTerms, - unusedProductions: unusedProds, - unusedTerminals: unusedTerms, - } -} - -func markUsedSymbols(mark map[string]bool, marked map[string]bool, prods map[string]*parser.ProductionNode, prod *parser.ProductionNode) { - if marked[prod.LHS] { - return - } - - for _, alt := range prod.RHS { - for _, e := range alt.Elements { - if e.ID == "" { - continue - } - - mark[e.ID] = true - - p, ok := prods[e.ID] - if !ok { - continue - } - - // Remove a production to avoid inifinite recursion. - marked[prod.LHS] = true - - markUsedSymbols(mark, marked, prods, p) - } - } -} - -func (b *GrammarBuilder) checkSpellingInconsistenciesOfUserDefinedIDs(root *parser.RootNode) { - var ids []string - { - for _, prod := range root.Productions { - ids = append(ids, prod.LHS) - for _, alt := range prod.RHS { - for _, elem := range alt.Elements { - if elem.Label != nil { - ids = append(ids, elem.Label.Name) - } - } - } - } - for _, prod := range root.LexProductions { - ids = append(ids, prod.LHS) - } - for _, dir := range root.Directives { - dirIDs := collectUserDefinedIDsFromDirective(dir) - if len(dirIDs) > 0 { - ids = append(ids, dirIDs...) - } - } - } - - duplicated := lexical.FindSpellingInconsistencies(ids) - if len(duplicated) == 0 { - return - } - - for _, dup := range duplicated { - var s string - { - var b strings.Builder - fmt.Fprintf(&b, "%+v", dup[0]) - for _, id := range dup[1:] { - fmt.Fprintf(&b, ", %+v", id) - } - s = b.String() - } - - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrSpellingInconsistency, - Detail: s, - }) - } -} - -func collectUserDefinedIDsFromDirective(dir *parser.DirectiveNode) []string { - var ids []string - for _, param := range dir.Parameters { - if param.Group != nil { - for _, d := range param.Group { - dIDs := collectUserDefinedIDsFromDirective(d) - if len(dIDs) > 0 { - ids = append(ids, dIDs...) - } - } - } - if param.OrderedSymbol != "" { - ids = append(ids, param.OrderedSymbol) - } - } - return ids -} - -type symbols struct { - errSym symbol.Symbol - augStartSym symbol.Symbol - startSym symbol.Symbol -} - -func (b *GrammarBuilder) genSymbolTable(root *parser.RootNode) (*symbol.SymbolTable, *symbols, error) { - symTab := symbol.NewSymbolTable() - w := symTab.Writer() - r := symTab.Reader() - - // We need to register the reserved symbol before registering others. - var errSym symbol.Symbol - { - sym, err := w.RegisterTerminalSymbol(reservedSymbolNameError) - if err != nil { - return nil, nil, err - } - errSym = sym - } - - for _, prod := range root.LexProductions { - if sym, exist := r.ToSymbol(prod.LHS); exist { - if sym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrErrSymIsReserved, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } else { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateTerminal, - Detail: prod.LHS, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - - continue - } - - _, err := w.RegisterTerminalSymbol(prod.LHS) - if err != nil { - return nil, nil, err - } - } - - startProd := root.Productions[0] - augStartText := fmt.Sprintf("%s'", startProd.LHS) - var err error - augStartSym, err := w.RegisterStartSymbol(augStartText) - if err != nil { - return nil, nil, err - } - if augStartSym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrErrSymIsReserved, - Row: startProd.Pos.Row, - Col: startProd.Pos.Col, - }) - } - - startSym, err := w.RegisterNonTerminalSymbol(startProd.LHS) - if err != nil { - return nil, nil, err - } - if startSym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrErrSymIsReserved, - Row: startProd.Pos.Row, - Col: startProd.Pos.Col, - }) - } - - for _, prod := range root.Productions { - sym, err := w.RegisterNonTerminalSymbol(prod.LHS) - if err != nil { - return nil, nil, err - } - if sym.IsTerminal() { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateName, - Detail: prod.LHS, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - if sym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrErrSymIsReserved, - Row: prod.Pos.Row, - Col: prod.Pos.Col, - }) - } - } - - return symTab, &symbols{ - errSym: errSym, - augStartSym: augStartSym, - startSym: startSym, - }, nil -} - -func (b *GrammarBuilder) genLexSpecAndSkipSymbols(symTab *symbol.SymbolTableReader, root *parser.RootNode) (*lexical.LexSpec, []symbol.Symbol, error) { - entries := []*lexical.LexEntry{} - skipSyms := []symbol.Symbol{} - for _, prod := range root.LexProductions { - entry, skip, specErr, err := genLexEntry(prod) - if err != nil { - return nil, nil, err - } - if specErr != nil { - b.errs = append(b.errs, specErr) - continue - } - if skip { - sym, _ := symTab.ToSymbol(prod.LHS) - skipSyms = append(skipSyms, sym) - } - entries = append(entries, entry) - } - - checkedFragments := map[string]struct{}{} - for _, fragment := range root.Fragments { - if _, exist := checkedFragments[fragment.LHS]; exist { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateFragment, - Detail: fragment.LHS, - Row: fragment.Pos.Row, - Col: fragment.Pos.Col, - }) - continue - } - checkedFragments[fragment.LHS] = struct{}{} - - entries = append(entries, &lexical.LexEntry{ - Fragment: true, - Kind: spec.LexKindName(fragment.LHS), - Pattern: fragment.RHS, - }) - } - - return &lexical.LexSpec{ - Entries: entries, - }, skipSyms, nil -} - -func genLexEntry(prod *parser.ProductionNode) (*lexical.LexEntry, bool, *verr.SpecError, error) { - alt := prod.RHS[0] - elem := alt.Elements[0] - - var pattern string - if elem.Literally { - pattern = spec.EscapePattern(elem.Pattern) - } else { - pattern = elem.Pattern - } - - var modes []spec.LexModeName - var skip bool - var push spec.LexModeName - var pop bool - dirConsumed := map[string]struct{}{} - for _, dir := range prod.Directives { - if _, consumed := dirConsumed[dir.Name]; consumed { - return nil, false, &verr.SpecError{ - Cause: semErrDuplicateDir, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - dirConsumed[dir.Name] = struct{}{} - - switch dir.Name { - case "mode": - if len(dir.Parameters) == 0 { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'mode' directive needs an ID parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - for _, param := range dir.Parameters { - if param.ID == "" { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'mode' directive needs an ID parameter", - Row: param.Pos.Row, - Col: param.Pos.Col, - }, nil - } - modes = append(modes, spec.LexModeName(param.ID)) - } - case "skip": - if len(dir.Parameters) > 0 { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'skip' directive needs no parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - skip = true - case "push": - if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'push' directive needs an ID parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - push = spec.LexModeName(dir.Parameters[0].ID) - case "pop": - if len(dir.Parameters) > 0 { - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'pop' directive needs no parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - pop = true - default: - return nil, false, &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - } - - if len(alt.Directives) > 0 { - return nil, false, &verr.SpecError{ - Cause: semErrInvalidAltDir, - Detail: "a lexical production cannot have alternative directives", - Row: alt.Directives[0].Pos.Row, - Col: alt.Directives[0].Pos.Col, - }, nil - } - - return &lexical.LexEntry{ - Modes: modes, - Kind: spec.LexKindName(prod.LHS), - Pattern: pattern, - Push: push, - Pop: pop, - }, skip, nil, nil -} - -type productionsAndActions struct { - prods *productionSet - augStartSym symbol.Symbol - astActs map[productionID][]*astActionEntry - prodPrecsTerm map[productionID]symbol.Symbol - prodPrecsOrdSym map[productionID]string - prodPrecPoss map[productionID]*parser.Position - recoverProds map[productionID]struct{} -} - -func (b *GrammarBuilder) genProductionsAndActions(root *parser.RootNode, symTab *symbol.SymbolTableReader, errSym symbol.Symbol, augStartSym symbol.Symbol, startSym symbol.Symbol) (*productionsAndActions, error) { - if len(root.Productions) == 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrNoProduction, - }) - return nil, nil - } - - prods := newProductionSet() - astActs := map[productionID][]*astActionEntry{} - prodPrecsTerm := map[productionID]symbol.Symbol{} - prodPrecsOrdSym := map[productionID]string{} - prodPrecPoss := map[productionID]*parser.Position{} - recoverProds := map[productionID]struct{}{} - - p, err := newProduction(augStartSym, []symbol.Symbol{ - startSym, - }) - if err != nil { - return nil, err - } - - prods.append(p) - - for _, prod := range root.Productions { - lhsSym, ok := symTab.ToSymbol(prod.LHS) - if !ok { - // All symbols are assumed to be pre-detected, so it's a bug if we cannot find them here. - return nil, fmt.Errorf("symbol '%v' is undefined", prod.LHS) - } - - if len(prod.Directives) > 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrInvalidProdDir, - Detail: "a production cannot have production directives", - Row: prod.Directives[0].Pos.Row, - Col: prod.Directives[0].Pos.Col, - }) - continue - } - - LOOP_RHS: - for _, alt := range prod.RHS { - altSyms := make([]symbol.Symbol, len(alt.Elements)) - offsets := map[string]int{} - ambiguousIDOffsets := map[string]struct{}{} - for i, elem := range alt.Elements { - sym, ok := symTab.ToSymbol(elem.ID) - if !ok { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUndefinedSym, - Detail: elem.ID, - Row: elem.Pos.Row, - Col: elem.Pos.Col, - }) - continue LOOP_RHS - } - altSyms[i] = sym - - if elem.Label != nil { - if _, added := offsets[elem.Label.Name]; added { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateLabel, - Detail: elem.Label.Name, - Row: elem.Label.Pos.Row, - Col: elem.Label.Pos.Col, - }) - continue LOOP_RHS - } - if _, found := symTab.ToSymbol(elem.Label.Name); found { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrInvalidLabel, - Detail: elem.Label.Name, - Row: elem.Label.Pos.Row, - Col: elem.Label.Pos.Col, - }) - continue LOOP_RHS - } - offsets[elem.Label.Name] = i - } - // A symbol having a label can be specified by both the label and the symbol name. - // So record the symbol's position, whether or not it has a label. - if elem.ID != "" { - if _, exist := offsets[elem.ID]; exist { - // When the same symbol appears multiple times in an alternative, the symbol is ambiguous. When we need - // to specify the symbol in a directive, we cannot use the name of the ambiguous symbol. Instead, specify - // a label to resolve the ambiguity. - delete(offsets, elem.ID) - ambiguousIDOffsets[elem.ID] = struct{}{} - } else { - offsets[elem.ID] = i - } - } - } - - p, err := newProduction(lhsSym, altSyms) - if err != nil { - return nil, err - } - if _, exist := prods.findByID(p.id); exist { - // Report the line number of a duplicate alternative. - // When the alternative is empty, we report the position of its LHS. - var row int - var col int - if len(alt.Elements) > 0 { - row = alt.Elements[0].Pos.Row - col = alt.Elements[0].Pos.Col - } else { - row = prod.Pos.Row - col = prod.Pos.Col - } - - var detail string - { - var b strings.Builder - fmt.Fprintf(&b, "%v →", prod.LHS) - for _, elem := range alt.Elements { - switch { - case elem.ID != "": - fmt.Fprintf(&b, " %v", elem.ID) - case elem.Pattern != "": - fmt.Fprintf(&b, ` "%v"`, elem.Pattern) - } - } - if len(alt.Elements) == 0 { - fmt.Fprintf(&b, " ε") - } - - detail = b.String() - } - - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateProduction, - Detail: detail, - Row: row, - Col: col, - }) - continue LOOP_RHS - } - prods.append(p) - - dirConsumed := map[string]struct{}{} - for _, dir := range alt.Directives { - if _, consumed := dirConsumed[dir.Name]; consumed { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateDir, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - } - dirConsumed[dir.Name] = struct{}{} - - switch dir.Name { - case "ast": - if len(dir.Parameters) == 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'ast' directive needs at least one parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - astAct := make([]*astActionEntry, len(dir.Parameters)) - consumedOffsets := map[int]struct{}{} - for i, param := range dir.Parameters { - if param.ID == "" { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'ast' directive can take only ID parameters", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - - if _, ambiguous := ambiguousIDOffsets[param.ID]; ambiguous { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrAmbiguousElem, - Detail: fmt.Sprintf("'%v' is ambiguous", param.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - - offset, ok := offsets[param.ID] - if !ok { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("a symbol was not found in an alternative: %v", param.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - if _, consumed := consumedOffsets[offset]; consumed { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateElem, - Detail: param.ID, - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - consumedOffsets[offset] = struct{}{} - - if param.Expansion { - elem := alt.Elements[offset] - if elem.Pattern != "" { - // Currently, it is a bug to reach here because it is - // forbidden to have anything other than ID appear in - // production rules. - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("the expansion symbol cannot be applied to a pattern (%v: \"%v\")", param.ID, elem.Pattern), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - elemSym, ok := symTab.ToSymbol(elem.ID) - if !ok { - // If the symbol was not found, it's a bug. - return nil, fmt.Errorf("a symbol corresponding to an ID (%v) was not found", elem.ID) - } - if elemSym.IsTerminal() { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("the expansion symbol cannot be applied to a terminal symbol (%v: %v)", param.ID, elem.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - } - - astAct[i] = &astActionEntry{ - position: offset + 1, - expansion: param.Expansion, - } - } - astActs[p.id] = astAct - case "prec": - if len(dir.Parameters) != 1 || (dir.Parameters[0].ID == "" && dir.Parameters[0].OrderedSymbol == "") { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'prec' directive needs just one ID parameter or ordered symbol", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - param := dir.Parameters[0] - switch { - case param.ID != "": - sym, ok := symTab.ToSymbol(param.ID) - if !ok { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("unknown terminal symbol: %v", param.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - if sym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("'%v' directive cannot be applied to an error symbol", dir.Name), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - } - if !sym.IsTerminal() { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("the symbol must be a terminal: %v", param.ID), - Row: param.Pos.Row, - Col: param.Pos.Col, - }) - continue LOOP_RHS - } - prodPrecsTerm[p.id] = sym - prodPrecPoss[p.id] = ¶m.Pos - case param.OrderedSymbol != "": - prodPrecsOrdSym[p.id] = param.OrderedSymbol - prodPrecPoss[p.id] = ¶m.Pos - } - case "recover": - if len(dir.Parameters) > 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'recover' directive needs no parameter", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - recoverProds[p.id] = struct{}{} - default: - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: fmt.Sprintf("invalid directive name '%v'", dir.Name), - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue LOOP_RHS - } - } - } - } - - return &productionsAndActions{ - prods: prods, - augStartSym: augStartSym, - astActs: astActs, - prodPrecsTerm: prodPrecsTerm, - prodPrecsOrdSym: prodPrecsOrdSym, - prodPrecPoss: prodPrecPoss, - recoverProds: recoverProds, - }, nil -} - -func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbol.SymbolTableReader, errSym symbol.Symbol, prodsAndActs *productionsAndActions) (*precAndAssoc, error) { - termPrec := map[symbol.SymbolNum]int{} - termAssoc := map[symbol.SymbolNum]assocType{} - ordSymPrec := map[string]int{} - { - var precGroup []*parser.DirectiveNode - for _, dir := range b.AST.Directives { - if dir.Name == "prec" { - if dir.Parameters == nil || len(dir.Parameters) != 1 || dir.Parameters[0].Group == nil { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "'prec' needs just one directive group", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue - } - precGroup = dir.Parameters[0].Group - continue - } - - if dir.Name != "name" && dir.Name != "prec" { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - continue - } - } - - precN := precMin - for _, dir := range precGroup { - var assocTy assocType - switch dir.Name { - case "left": - assocTy = assocTypeLeft - case "right": - assocTy = assocTypeRight - case "assign": - assocTy = assocTypeNil - default: - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - return nil, nil - } - - if len(dir.Parameters) == 0 { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "associativity needs at least one symbol", - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }) - return nil, nil - } - ASSOC_PARAM_LOOP: - for _, p := range dir.Parameters { - switch { - case p.ID != "": - sym, ok := symTab.ToSymbol(p.ID) - if !ok { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("'%v' is undefined", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - return nil, nil - } - if sym == errSym { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("'%v' directive cannot be applied to an error symbol", dir.Name), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - return nil, nil - } - if !sym.IsTerminal() { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: fmt.Sprintf("associativity can take only terminal symbol ('%v' is a non-terminal)", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - return nil, nil - } - if prec, alreadySet := termPrec[sym.Num()]; alreadySet { - if prec == precN { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'%v' already has the same associativity and precedence", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } else if assoc := termAssoc[sym.Num()]; assoc == assocTy { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'%v' already has different precedence", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } else { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'%v' already has different associativity and precedence", p.ID), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } - break ASSOC_PARAM_LOOP - } - - termPrec[sym.Num()] = precN - termAssoc[sym.Num()] = assocTy - case p.OrderedSymbol != "": - if prec, alreadySet := ordSymPrec[p.OrderedSymbol]; alreadySet { - if prec == precN { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'$%v' already has the same precedence", p.OrderedSymbol), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } else { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDuplicateAssoc, - Detail: fmt.Sprintf("'$%v' already has different precedence", p.OrderedSymbol), - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - } - break ASSOC_PARAM_LOOP - } - - ordSymPrec[p.OrderedSymbol] = precN - default: - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrDirInvalidParam, - Detail: "a parameter must be an ID or an ordered symbol", - Row: p.Pos.Row, - Col: p.Pos.Col, - }) - return nil, nil - } - } - - precN++ - } - } - if len(b.errs) > 0 { - return nil, nil - } - - prodPrec := map[productionNum]int{} - prodAssoc := map[productionNum]assocType{} - for _, prod := range prodsAndActs.prods.getAllProductions() { - // A #prec directive changes only precedence, not associativity. - if term, ok := prodsAndActs.prodPrecsTerm[prod.id]; ok { - if prec, ok := termPrec[term.Num()]; ok { - prodPrec[prod.num] = prec - prodAssoc[prod.num] = assocTypeNil - } else { - text, _ := symTab.ToText(term) - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUndefinedPrec, - Detail: text, - Row: prodsAndActs.prodPrecPoss[prod.id].Row, - Col: prodsAndActs.prodPrecPoss[prod.id].Col, - }) - } - } else if ordSym, ok := prodsAndActs.prodPrecsOrdSym[prod.id]; ok { - if prec, ok := ordSymPrec[ordSym]; ok { - prodPrec[prod.num] = prec - prodAssoc[prod.num] = assocTypeNil - } else { - b.errs = append(b.errs, &verr.SpecError{ - Cause: semErrUndefinedOrdSym, - Detail: fmt.Sprintf("$%v", ordSym), - Row: prodsAndActs.prodPrecPoss[prod.id].Row, - Col: prodsAndActs.prodPrecPoss[prod.id].Col, - }) - } - } else { - // A production inherits precedence and associativity from the right-most terminal symbol. - mostrightTerm := symbol.SymbolNil - for _, sym := range prod.rhs { - if !sym.IsTerminal() { - continue - } - mostrightTerm = sym - } - if !mostrightTerm.IsNil() { - prodPrec[prod.num] = termPrec[mostrightTerm.Num()] - prodAssoc[prod.num] = termAssoc[mostrightTerm.Num()] - } - } - } - if len(b.errs) > 0 { - return nil, nil - } - - return &precAndAssoc{ - termPrec: termPrec, - termAssoc: termAssoc, - prodPrec: prodPrec, - prodAssoc: prodAssoc, - }, nil -} - -func compile(gram *Grammar, opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) { - config := &buildConfig{} - for _, opt := range opts { - opt(config) - } - - lexSpec, err, cErrs := lexical.Compile(gram.lexSpec, lexical.CompressionLevelMax) - if err != nil { - if len(cErrs) > 0 { - var b strings.Builder - writeCompileError(&b, cErrs[0]) - for _, cerr := range cErrs[1:] { - fmt.Fprintf(&b, "\n") - writeCompileError(&b, cerr) - } - return nil, nil, fmt.Errorf(b.String()) - } - return nil, nil, err - } - - kind2Term := make([]int, len(lexSpec.KindNames)) - for i, k := range lexSpec.KindNames { - if k == spec.LexKindNameNil { - kind2Term[spec.LexKindIDNil] = symbol.SymbolNil.Num().Int() - continue - } - - sym, ok := gram.symbolTable.ToSymbol(k.String()) - if !ok { - return nil, nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k) - } - kind2Term[i] = sym.Num().Int() - } - - termTexts, err := gram.symbolTable.TerminalTexts() - if err != nil { - return nil, nil, err - } - - var termSkip []int - { - r := gram.symbolTable.Reader() - // I want to use gram.symbolTable.terminalSymbols() here instead of gram.symbolTable.terminalTexts(), - // but gram.symbolTable.terminalSymbols() is different in length from terminalTexts - // because it does not contain a predefined symbol, like EOF. - // Therefore, we use terminalTexts, although it takes more time to lookup for symbols. - termSkip = make([]int, len(termTexts)) - for _, t := range termTexts { - s, _ := r.ToSymbol(t) - for _, sk := range gram.skipSymbols { - if s != sk { - continue - } - termSkip[s.Num()] = 1 - break - } - } - } - - nonTerms, err := gram.symbolTable.NonTerminalTexts() - if err != nil { - return nil, nil, err - } - - firstSet, err := genFirstSet(gram.productionSet) - if err != nil { - return nil, nil, err - } - - lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) - if err != nil { - return nil, nil, err - } - - var tab *ParsingTable - var report *spec.Report - { - lalr1, err := genLALR1Automaton(lr0, gram.productionSet, firstSet) - if err != nil { - return nil, nil, err - } - - b := &lrTableBuilder{ - automaton: lalr1.lr0Automaton, - prods: gram.productionSet, - termCount: len(termTexts), - nonTermCount: len(nonTerms), - symTab: gram.symbolTable, - precAndAssoc: gram.precAndAssoc, - } - tab, err = b.build() - if err != nil { - return nil, nil, err - } - - if config.isReportingEnabled { - report, err = b.genReport(tab, gram) - if err != nil { - return nil, nil, err - } - } - } - - action := make([]int, len(tab.actionTable)) - for i, e := range tab.actionTable { - action[i] = int(e) - } - goTo := make([]int, len(tab.goToTable)) - for i, e := range tab.goToTable { - goTo[i] = int(e) - } - - lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1) - altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1) - recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1) - astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1) - for _, p := range gram.productionSet.getAllProductions() { - lhsSyms[p.num] = p.lhs.Num().Int() - altSymCounts[p.num] = p.rhsLen - - if _, ok := gram.recoverProductions[p.id]; ok { - recoverProds[p.num] = 1 - } - - astAct, ok := gram.astActions[p.id] - if !ok { - continue - } - astActEntry := make([]int, len(astAct)) - for i, e := range astAct { - if e.expansion { - astActEntry[i] = e.position * -1 - } else { - astActEntry[i] = e.position - } - } - astActEnties[p.num] = astActEntry - } - - return &spec.CompiledGrammar{ - Name: gram.name, - Lexical: lexSpec, - Syntactic: &spec.SyntacticSpec{ - Action: action, - GoTo: goTo, - StateCount: tab.stateCount, - InitialState: tab.InitialState.Int(), - StartProduction: productionNumStart.Int(), - LHSSymbols: lhsSyms, - AlternativeSymbolCounts: altSymCounts, - Terminals: termTexts, - TerminalCount: tab.terminalCount, - TerminalSkip: termSkip, - KindToTerminal: kind2Term, - NonTerminals: nonTerms, - NonTerminalCount: tab.nonTerminalCount, - EOFSymbol: symbol.SymbolEOF.Num().Int(), - ErrorSymbol: gram.errorSymbol.Num().Int(), - ErrorTrapperStates: tab.errorTrapperStates, - RecoverProductions: recoverProds, - }, - ASTAction: &spec.ASTAction{ - Entries: astActEnties, - }, - }, report, nil -} - -func writeCompileError(w io.Writer, cErr *lexical.CompileError) { - if cErr.Fragment { - fmt.Fprintf(w, "fragment ") - } - fmt.Fprintf(w, "%v: %v", cErr.Kind, cErr.Cause) - if cErr.Detail != "" { - fmt.Fprintf(w, ": %v", cErr.Detail) - } -} |