diff options
Diffstat (limited to 'src/urubu/grammar/grammar.go')
-rw-r--r-- | src/urubu/grammar/grammar.go | 1390 |
1 files changed, 1390 insertions, 0 deletions
diff --git a/src/urubu/grammar/grammar.go b/src/urubu/grammar/grammar.go new file mode 100644 index 0000000..bfa53c6 --- /dev/null +++ b/src/urubu/grammar/grammar.go @@ -0,0 +1,1390 @@ +package grammar + +import ( + "fmt" + "io" + "strings" + + verr "urubu/error" + "urubu/grammar/lexical" + "urubu/grammar/symbol" + spec "urubu/spec/grammar" + "urubu/spec/grammar/parser" +) + +type astActionEntry struct { + position int + expansion bool +} + +type assocType string + +const ( + assocTypeNil = assocType("") + assocTypeLeft = assocType("left") + assocTypeRight = assocType("right") +) + +const ( + precNil = 0 + precMin = 1 +) + +// precAndAssoc represents precedence and associativities of terminal symbols and productions. +// We use the priority of the production to resolve shift/reduce conflicts. +type precAndAssoc struct { + // termPrec and termAssoc represent the precedence of the terminal symbols. + termPrec map[symbol.SymbolNum]int + termAssoc map[symbol.SymbolNum]assocType + + // prodPrec and prodAssoc represent the precedence and the associativities of the production. + // These values are inherited from the right-most terminal symbols in the RHS of the productions. + prodPrec map[productionNum]int + prodAssoc map[productionNum]assocType +} + +func (pa *precAndAssoc) terminalPrecedence(sym symbol.SymbolNum) int { + prec, ok := pa.termPrec[sym] + if !ok { + return precNil + } + + return prec +} + +func (pa *precAndAssoc) terminalAssociativity(sym symbol.SymbolNum) assocType { + assoc, ok := pa.termAssoc[sym] + if !ok { + return assocTypeNil + } + + return assoc +} + +func (pa *precAndAssoc) productionPredence(prod productionNum) int { + prec, ok := pa.prodPrec[prod] + if !ok { + return precNil + } + + return prec +} + +func (pa *precAndAssoc) productionAssociativity(prod productionNum) assocType { + assoc, ok := pa.prodAssoc[prod] + if !ok { + return assocTypeNil + } + + return assoc +} + +const reservedSymbolNameError = "error" + +type Grammar struct { + name string + lexSpec *lexical.LexSpec + skipSymbols []symbol.Symbol + productionSet *productionSet + augmentedStartSymbol symbol.Symbol + errorSymbol symbol.Symbol + symbolTable *symbol.SymbolTableReader + astActions map[productionID][]*astActionEntry + precAndAssoc *precAndAssoc + + // recoverProductions is a set of productions having the recover directive. + recoverProductions map[productionID]struct{} +} + +type buildConfig struct { + isReportingEnabled bool +} + +type BuildOption func(config *buildConfig) + +func EnableReporting() BuildOption { + return func(config *buildConfig) { + config.isReportingEnabled = true + } +} + +type GrammarBuilder struct { + AST *parser.RootNode + + errs verr.SpecErrors +} + +func (b *GrammarBuilder) Build(opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) { + gram, err := b.build() + if err != nil { + return nil, nil, err + } + + return compile(gram, opts...) +} + +func (b *GrammarBuilder) build() (*Grammar, error) { + var specName string + { + errOccurred := false + for _, dir := range b.AST.Directives { + if dir.Name != "name" { + continue + } + + if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'name' takes just one ID parameter", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + + errOccurred = true + break + } + + specName = dir.Parameters[0].ID + break + } + + if specName == "" && !errOccurred { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrNoGrammarName, + }) + } + } + + b.checkSpellingInconsistenciesOfUserDefinedIDs(b.AST) + if len(b.errs) > 0 { + return nil, b.errs + } + + symTab, ss, err := b.genSymbolTable(b.AST) + if err != nil { + return nil, err + } + + lexSpec, skip, err := b.genLexSpecAndSkipSymbols(symTab.Reader(), b.AST) + if err != nil { + return nil, err + } + + prodsAndActs, err := b.genProductionsAndActions(b.AST, symTab.Reader(), ss.errSym, ss.augStartSym, ss.startSym) + if err != nil { + return nil, err + } + if prodsAndActs == nil && len(b.errs) > 0 { + return nil, b.errs + } + + pa, err := b.genPrecAndAssoc(symTab.Reader(), ss.errSym, prodsAndActs) + if err != nil { + return nil, err + } + if pa == nil && len(b.errs) > 0 { + return nil, b.errs + } + + syms := findUsedAndUnusedSymbols(b.AST) + if syms == nil && len(b.errs) > 0 { + return nil, b.errs + } + + // When a terminal symbol that cannot be reached from the start symbol has the skip directive, + // the compiler treats its terminal as a used symbol, not unused. + { + r := symTab.Reader() + for _, sym := range skip { + s, _ := r.ToText(sym) + if _, ok := syms.unusedTerminals[s]; !ok { + prod := syms.usedTerminals[s] + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrTermCannotBeSkipped, + Detail: s, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + continue + } + + delete(syms.unusedTerminals, s) + } + } + + for sym, prod := range syms.unusedProductions { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrUnusedProduction, + Detail: sym, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } + + for sym, prod := range syms.unusedTerminals { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrUnusedTerminal, + Detail: sym, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } + + if len(b.errs) > 0 { + return nil, b.errs + } + + return &Grammar{ + name: specName, + lexSpec: lexSpec, + skipSymbols: skip, + productionSet: prodsAndActs.prods, + augmentedStartSymbol: prodsAndActs.augStartSym, + errorSymbol: ss.errSym, + symbolTable: symTab.Reader(), + astActions: prodsAndActs.astActs, + recoverProductions: prodsAndActs.recoverProds, + precAndAssoc: pa, + }, nil +} + +type usedAndUnusedSymbols struct { + unusedProductions map[string]*parser.ProductionNode + unusedTerminals map[string]*parser.ProductionNode + usedTerminals map[string]*parser.ProductionNode +} + +func findUsedAndUnusedSymbols(root *parser.RootNode) *usedAndUnusedSymbols { + prods := map[string]*parser.ProductionNode{} + lexProds := map[string]*parser.ProductionNode{} + mark := map[string]bool{} + { + for _, p := range root.Productions { + prods[p.LHS] = p + mark[p.LHS] = false + for _, alt := range p.RHS { + for _, e := range alt.Elements { + if e.ID == "" { + continue + } + mark[e.ID] = false + } + } + } + + for _, p := range root.LexProductions { + lexProds[p.LHS] = p + mark[p.LHS] = false + } + + start := root.Productions[0] + mark[start.LHS] = true + markUsedSymbols(mark, map[string]bool{}, prods, start) + + // We don't have to check the error symbol because the error symbol doesn't have a production. + delete(mark, reservedSymbolNameError) + } + + usedTerms := make(map[string]*parser.ProductionNode, len(lexProds)) + unusedProds := map[string]*parser.ProductionNode{} + unusedTerms := map[string]*parser.ProductionNode{} + for sym, used := range mark { + if p, ok := prods[sym]; ok { + if used { + continue + } + unusedProds[sym] = p + continue + } + if p, ok := lexProds[sym]; ok { + if used { + usedTerms[sym] = p + } else { + unusedTerms[sym] = p + } + continue + } + + // May be reached here when a fragment name appears on the right-hand side of a production rule. However, an error + // to the effect that a production rule cannot contain a fragment will be detected in a subsequent process. So we can + // ignore it here. + } + + return &usedAndUnusedSymbols{ + usedTerminals: usedTerms, + unusedProductions: unusedProds, + unusedTerminals: unusedTerms, + } +} + +func markUsedSymbols(mark map[string]bool, marked map[string]bool, prods map[string]*parser.ProductionNode, prod *parser.ProductionNode) { + if marked[prod.LHS] { + return + } + + for _, alt := range prod.RHS { + for _, e := range alt.Elements { + if e.ID == "" { + continue + } + + mark[e.ID] = true + + p, ok := prods[e.ID] + if !ok { + continue + } + + // Remove a production to avoid inifinite recursion. + marked[prod.LHS] = true + + markUsedSymbols(mark, marked, prods, p) + } + } +} + +func (b *GrammarBuilder) checkSpellingInconsistenciesOfUserDefinedIDs(root *parser.RootNode) { + var ids []string + { + for _, prod := range root.Productions { + ids = append(ids, prod.LHS) + for _, alt := range prod.RHS { + for _, elem := range alt.Elements { + if elem.Label != nil { + ids = append(ids, elem.Label.Name) + } + } + } + } + for _, prod := range root.LexProductions { + ids = append(ids, prod.LHS) + } + for _, dir := range root.Directives { + dirIDs := collectUserDefinedIDsFromDirective(dir) + if len(dirIDs) > 0 { + ids = append(ids, dirIDs...) + } + } + } + + duplicated := lexical.FindSpellingInconsistencies(ids) + if len(duplicated) == 0 { + return + } + + for _, dup := range duplicated { + var s string + { + var b strings.Builder + fmt.Fprintf(&b, "%+v", dup[0]) + for _, id := range dup[1:] { + fmt.Fprintf(&b, ", %+v", id) + } + s = b.String() + } + + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrSpellingInconsistency, + Detail: s, + }) + } +} + +func collectUserDefinedIDsFromDirective(dir *parser.DirectiveNode) []string { + var ids []string + for _, param := range dir.Parameters { + if param.Group != nil { + for _, d := range param.Group { + dIDs := collectUserDefinedIDsFromDirective(d) + if len(dIDs) > 0 { + ids = append(ids, dIDs...) + } + } + } + if param.OrderedSymbol != "" { + ids = append(ids, param.OrderedSymbol) + } + } + return ids +} + +type symbols struct { + errSym symbol.Symbol + augStartSym symbol.Symbol + startSym symbol.Symbol +} + +func (b *GrammarBuilder) genSymbolTable(root *parser.RootNode) (*symbol.SymbolTable, *symbols, error) { + symTab := symbol.NewSymbolTable() + w := symTab.Writer() + r := symTab.Reader() + + // We need to register the reserved symbol before registering others. + var errSym symbol.Symbol + { + sym, err := w.RegisterTerminalSymbol(reservedSymbolNameError) + if err != nil { + return nil, nil, err + } + errSym = sym + } + + for _, prod := range root.LexProductions { + if sym, exist := r.ToSymbol(prod.LHS); exist { + if sym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } else { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateTerminal, + Detail: prod.LHS, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } + + continue + } + + _, err := w.RegisterTerminalSymbol(prod.LHS) + if err != nil { + return nil, nil, err + } + } + + startProd := root.Productions[0] + augStartText := fmt.Sprintf("%s'", startProd.LHS) + var err error + augStartSym, err := w.RegisterStartSymbol(augStartText) + if err != nil { + return nil, nil, err + } + if augStartSym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: startProd.Pos.Row, + Col: startProd.Pos.Col, + }) + } + + startSym, err := w.RegisterNonTerminalSymbol(startProd.LHS) + if err != nil { + return nil, nil, err + } + if startSym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: startProd.Pos.Row, + Col: startProd.Pos.Col, + }) + } + + for _, prod := range root.Productions { + sym, err := w.RegisterNonTerminalSymbol(prod.LHS) + if err != nil { + return nil, nil, err + } + if sym.IsTerminal() { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateName, + Detail: prod.LHS, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } + if sym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrErrSymIsReserved, + Row: prod.Pos.Row, + Col: prod.Pos.Col, + }) + } + } + + return symTab, &symbols{ + errSym: errSym, + augStartSym: augStartSym, + startSym: startSym, + }, nil +} + +func (b *GrammarBuilder) genLexSpecAndSkipSymbols(symTab *symbol.SymbolTableReader, root *parser.RootNode) (*lexical.LexSpec, []symbol.Symbol, error) { + entries := []*lexical.LexEntry{} + skipSyms := []symbol.Symbol{} + for _, prod := range root.LexProductions { + entry, skip, specErr, err := genLexEntry(prod) + if err != nil { + return nil, nil, err + } + if specErr != nil { + b.errs = append(b.errs, specErr) + continue + } + if skip { + sym, _ := symTab.ToSymbol(prod.LHS) + skipSyms = append(skipSyms, sym) + } + entries = append(entries, entry) + } + + checkedFragments := map[string]struct{}{} + for _, fragment := range root.Fragments { + if _, exist := checkedFragments[fragment.LHS]; exist { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateFragment, + Detail: fragment.LHS, + Row: fragment.Pos.Row, + Col: fragment.Pos.Col, + }) + continue + } + checkedFragments[fragment.LHS] = struct{}{} + + entries = append(entries, &lexical.LexEntry{ + Fragment: true, + Kind: spec.LexKindName(fragment.LHS), + Pattern: fragment.RHS, + }) + } + + return &lexical.LexSpec{ + Entries: entries, + }, skipSyms, nil +} + +func genLexEntry(prod *parser.ProductionNode) (*lexical.LexEntry, bool, *verr.SpecError, error) { + alt := prod.RHS[0] + elem := alt.Elements[0] + + var pattern string + if elem.Literally { + pattern = spec.EscapePattern(elem.Pattern) + } else { + pattern = elem.Pattern + } + + var modes []spec.LexModeName + var skip bool + var push spec.LexModeName + var pop bool + dirConsumed := map[string]struct{}{} + for _, dir := range prod.Directives { + if _, consumed := dirConsumed[dir.Name]; consumed { + return nil, false, &verr.SpecError{ + Cause: semErrDuplicateDir, + Detail: dir.Name, + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }, nil + } + dirConsumed[dir.Name] = struct{}{} + + switch dir.Name { + case "mode": + if len(dir.Parameters) == 0 { + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'mode' directive needs an ID parameter", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }, nil + } + for _, param := range dir.Parameters { + if param.ID == "" { + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'mode' directive needs an ID parameter", + Row: param.Pos.Row, + Col: param.Pos.Col, + }, nil + } + modes = append(modes, spec.LexModeName(param.ID)) + } + case "skip": + if len(dir.Parameters) > 0 { + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'skip' directive needs no parameter", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }, nil + } + skip = true + case "push": + if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'push' directive needs an ID parameter", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }, nil + } + push = spec.LexModeName(dir.Parameters[0].ID) + case "pop": + if len(dir.Parameters) > 0 { + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'pop' directive needs no parameter", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }, nil + } + pop = true + default: + return nil, false, &verr.SpecError{ + Cause: semErrDirInvalidName, + Detail: dir.Name, + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }, nil + } + } + + if len(alt.Directives) > 0 { + return nil, false, &verr.SpecError{ + Cause: semErrInvalidAltDir, + Detail: "a lexical production cannot have alternative directives", + Row: alt.Directives[0].Pos.Row, + Col: alt.Directives[0].Pos.Col, + }, nil + } + + return &lexical.LexEntry{ + Modes: modes, + Kind: spec.LexKindName(prod.LHS), + Pattern: pattern, + Push: push, + Pop: pop, + }, skip, nil, nil +} + +type productionsAndActions struct { + prods *productionSet + augStartSym symbol.Symbol + astActs map[productionID][]*astActionEntry + prodPrecsTerm map[productionID]symbol.Symbol + prodPrecsOrdSym map[productionID]string + prodPrecPoss map[productionID]*parser.Position + recoverProds map[productionID]struct{} +} + +func (b *GrammarBuilder) genProductionsAndActions(root *parser.RootNode, symTab *symbol.SymbolTableReader, errSym symbol.Symbol, augStartSym symbol.Symbol, startSym symbol.Symbol) (*productionsAndActions, error) { + if len(root.Productions) == 0 { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrNoProduction, + }) + return nil, nil + } + + prods := newProductionSet() + astActs := map[productionID][]*astActionEntry{} + prodPrecsTerm := map[productionID]symbol.Symbol{} + prodPrecsOrdSym := map[productionID]string{} + prodPrecPoss := map[productionID]*parser.Position{} + recoverProds := map[productionID]struct{}{} + + p, err := newProduction(augStartSym, []symbol.Symbol{ + startSym, + }) + if err != nil { + return nil, err + } + + prods.append(p) + + for _, prod := range root.Productions { + lhsSym, ok := symTab.ToSymbol(prod.LHS) + if !ok { + // All symbols are assumed to be pre-detected, so it's a bug if we cannot find them here. + return nil, fmt.Errorf("symbol '%v' is undefined", prod.LHS) + } + + if len(prod.Directives) > 0 { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrInvalidProdDir, + Detail: "a production cannot have production directives", + Row: prod.Directives[0].Pos.Row, + Col: prod.Directives[0].Pos.Col, + }) + continue + } + + LOOP_RHS: + for _, alt := range prod.RHS { + altSyms := make([]symbol.Symbol, len(alt.Elements)) + offsets := map[string]int{} + ambiguousIDOffsets := map[string]struct{}{} + for i, elem := range alt.Elements { + sym, ok := symTab.ToSymbol(elem.ID) + if !ok { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrUndefinedSym, + Detail: elem.ID, + Row: elem.Pos.Row, + Col: elem.Pos.Col, + }) + continue LOOP_RHS + } + altSyms[i] = sym + + if elem.Label != nil { + if _, added := offsets[elem.Label.Name]; added { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateLabel, + Detail: elem.Label.Name, + Row: elem.Label.Pos.Row, + Col: elem.Label.Pos.Col, + }) + continue LOOP_RHS + } + if _, found := symTab.ToSymbol(elem.Label.Name); found { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrInvalidLabel, + Detail: elem.Label.Name, + Row: elem.Label.Pos.Row, + Col: elem.Label.Pos.Col, + }) + continue LOOP_RHS + } + offsets[elem.Label.Name] = i + } + // A symbol having a label can be specified by both the label and the symbol name. + // So record the symbol's position, whether or not it has a label. + if elem.ID != "" { + if _, exist := offsets[elem.ID]; exist { + // When the same symbol appears multiple times in an alternative, the symbol is ambiguous. When we need + // to specify the symbol in a directive, we cannot use the name of the ambiguous symbol. Instead, specify + // a label to resolve the ambiguity. + delete(offsets, elem.ID) + ambiguousIDOffsets[elem.ID] = struct{}{} + } else { + offsets[elem.ID] = i + } + } + } + + p, err := newProduction(lhsSym, altSyms) + if err != nil { + return nil, err + } + if _, exist := prods.findByID(p.id); exist { + // Report the line number of a duplicate alternative. + // When the alternative is empty, we report the position of its LHS. + var row int + var col int + if len(alt.Elements) > 0 { + row = alt.Elements[0].Pos.Row + col = alt.Elements[0].Pos.Col + } else { + row = prod.Pos.Row + col = prod.Pos.Col + } + + var detail string + { + var b strings.Builder + fmt.Fprintf(&b, "%v →", prod.LHS) + for _, elem := range alt.Elements { + switch { + case elem.ID != "": + fmt.Fprintf(&b, " %v", elem.ID) + case elem.Pattern != "": + fmt.Fprintf(&b, ` "%v"`, elem.Pattern) + } + } + if len(alt.Elements) == 0 { + fmt.Fprintf(&b, " ε") + } + + detail = b.String() + } + + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateProduction, + Detail: detail, + Row: row, + Col: col, + }) + continue LOOP_RHS + } + prods.append(p) + + dirConsumed := map[string]struct{}{} + for _, dir := range alt.Directives { + if _, consumed := dirConsumed[dir.Name]; consumed { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateDir, + Detail: dir.Name, + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + } + dirConsumed[dir.Name] = struct{}{} + + switch dir.Name { + case "ast": + if len(dir.Parameters) == 0 { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'ast' directive needs at least one parameter", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue LOOP_RHS + } + astAct := make([]*astActionEntry, len(dir.Parameters)) + consumedOffsets := map[int]struct{}{} + for i, param := range dir.Parameters { + if param.ID == "" { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'ast' directive can take only ID parameters", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue LOOP_RHS + } + + if _, ambiguous := ambiguousIDOffsets[param.ID]; ambiguous { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrAmbiguousElem, + Detail: fmt.Sprintf("'%v' is ambiguous", param.ID), + Row: param.Pos.Row, + Col: param.Pos.Col, + }) + continue LOOP_RHS + } + + offset, ok := offsets[param.ID] + if !ok { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("a symbol was not found in an alternative: %v", param.ID), + Row: param.Pos.Row, + Col: param.Pos.Col, + }) + continue LOOP_RHS + } + if _, consumed := consumedOffsets[offset]; consumed { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateElem, + Detail: param.ID, + Row: param.Pos.Row, + Col: param.Pos.Col, + }) + continue LOOP_RHS + } + consumedOffsets[offset] = struct{}{} + + if param.Expansion { + elem := alt.Elements[offset] + if elem.Pattern != "" { + // Currently, it is a bug to reach here because it is + // forbidden to have anything other than ID appear in + // production rules. + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("the expansion symbol cannot be applied to a pattern (%v: \"%v\")", param.ID, elem.Pattern), + Row: param.Pos.Row, + Col: param.Pos.Col, + }) + continue LOOP_RHS + } + elemSym, ok := symTab.ToSymbol(elem.ID) + if !ok { + // If the symbol was not found, it's a bug. + return nil, fmt.Errorf("a symbol corresponding to an ID (%v) was not found", elem.ID) + } + if elemSym.IsTerminal() { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("the expansion symbol cannot be applied to a terminal symbol (%v: %v)", param.ID, elem.ID), + Row: param.Pos.Row, + Col: param.Pos.Col, + }) + continue LOOP_RHS + } + } + + astAct[i] = &astActionEntry{ + position: offset + 1, + expansion: param.Expansion, + } + } + astActs[p.id] = astAct + case "prec": + if len(dir.Parameters) != 1 || (dir.Parameters[0].ID == "" && dir.Parameters[0].OrderedSymbol == "") { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'prec' directive needs just one ID parameter or ordered symbol", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue LOOP_RHS + } + param := dir.Parameters[0] + switch { + case param.ID != "": + sym, ok := symTab.ToSymbol(param.ID) + if !ok { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("unknown terminal symbol: %v", param.ID), + Row: param.Pos.Row, + Col: param.Pos.Col, + }) + continue LOOP_RHS + } + if sym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'%v' directive cannot be applied to an error symbol", dir.Name), + Row: param.Pos.Row, + Col: param.Pos.Col, + }) + } + if !sym.IsTerminal() { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("the symbol must be a terminal: %v", param.ID), + Row: param.Pos.Row, + Col: param.Pos.Col, + }) + continue LOOP_RHS + } + prodPrecsTerm[p.id] = sym + prodPrecPoss[p.id] = ¶m.Pos + case param.OrderedSymbol != "": + prodPrecsOrdSym[p.id] = param.OrderedSymbol + prodPrecPoss[p.id] = ¶m.Pos + } + case "recover": + if len(dir.Parameters) > 0 { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'recover' directive needs no parameter", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue LOOP_RHS + } + recoverProds[p.id] = struct{}{} + default: + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidName, + Detail: fmt.Sprintf("invalid directive name '%v'", dir.Name), + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue LOOP_RHS + } + } + } + } + + return &productionsAndActions{ + prods: prods, + augStartSym: augStartSym, + astActs: astActs, + prodPrecsTerm: prodPrecsTerm, + prodPrecsOrdSym: prodPrecsOrdSym, + prodPrecPoss: prodPrecPoss, + recoverProds: recoverProds, + }, nil +} + +func (b *GrammarBuilder) genPrecAndAssoc(symTab *symbol.SymbolTableReader, errSym symbol.Symbol, prodsAndActs *productionsAndActions) (*precAndAssoc, error) { + termPrec := map[symbol.SymbolNum]int{} + termAssoc := map[symbol.SymbolNum]assocType{} + ordSymPrec := map[string]int{} + { + var precGroup []*parser.DirectiveNode + for _, dir := range b.AST.Directives { + if dir.Name == "prec" { + if dir.Parameters == nil || len(dir.Parameters) != 1 || dir.Parameters[0].Group == nil { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "'prec' needs just one directive group", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue + } + precGroup = dir.Parameters[0].Group + continue + } + + if dir.Name != "name" && dir.Name != "prec" { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidName, + Detail: dir.Name, + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + continue + } + } + + precN := precMin + for _, dir := range precGroup { + var assocTy assocType + switch dir.Name { + case "left": + assocTy = assocTypeLeft + case "right": + assocTy = assocTypeRight + case "assign": + assocTy = assocTypeNil + default: + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidName, + Detail: dir.Name, + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + return nil, nil + } + + if len(dir.Parameters) == 0 { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "associativity needs at least one symbol", + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }) + return nil, nil + } + ASSOC_PARAM_LOOP: + for _, p := range dir.Parameters { + switch { + case p.ID != "": + sym, ok := symTab.ToSymbol(p.ID) + if !ok { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'%v' is undefined", p.ID), + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + return nil, nil + } + if sym == errSym { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'%v' directive cannot be applied to an error symbol", dir.Name), + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + return nil, nil + } + if !sym.IsTerminal() { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("associativity can take only terminal symbol ('%v' is a non-terminal)", p.ID), + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + return nil, nil + } + if prec, alreadySet := termPrec[sym.Num()]; alreadySet { + if prec == precN { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateAssoc, + Detail: fmt.Sprintf("'%v' already has the same associativity and precedence", p.ID), + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + } else if assoc := termAssoc[sym.Num()]; assoc == assocTy { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateAssoc, + Detail: fmt.Sprintf("'%v' already has different precedence", p.ID), + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + } else { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateAssoc, + Detail: fmt.Sprintf("'%v' already has different associativity and precedence", p.ID), + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + } + break ASSOC_PARAM_LOOP + } + + termPrec[sym.Num()] = precN + termAssoc[sym.Num()] = assocTy + case p.OrderedSymbol != "": + if prec, alreadySet := ordSymPrec[p.OrderedSymbol]; alreadySet { + if prec == precN { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateAssoc, + Detail: fmt.Sprintf("'$%v' already has the same precedence", p.OrderedSymbol), + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + } else { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateAssoc, + Detail: fmt.Sprintf("'$%v' already has different precedence", p.OrderedSymbol), + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + } + break ASSOC_PARAM_LOOP + } + + ordSymPrec[p.OrderedSymbol] = precN + default: + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: "a parameter must be an ID or an ordered symbol", + Row: p.Pos.Row, + Col: p.Pos.Col, + }) + return nil, nil + } + } + + precN++ + } + } + if len(b.errs) > 0 { + return nil, nil + } + + prodPrec := map[productionNum]int{} + prodAssoc := map[productionNum]assocType{} + for _, prod := range prodsAndActs.prods.getAllProductions() { + // A #prec directive changes only precedence, not associativity. + if term, ok := prodsAndActs.prodPrecsTerm[prod.id]; ok { + if prec, ok := termPrec[term.Num()]; ok { + prodPrec[prod.num] = prec + prodAssoc[prod.num] = assocTypeNil + } else { + text, _ := symTab.ToText(term) + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrUndefinedPrec, + Detail: text, + Row: prodsAndActs.prodPrecPoss[prod.id].Row, + Col: prodsAndActs.prodPrecPoss[prod.id].Col, + }) + } + } else if ordSym, ok := prodsAndActs.prodPrecsOrdSym[prod.id]; ok { + if prec, ok := ordSymPrec[ordSym]; ok { + prodPrec[prod.num] = prec + prodAssoc[prod.num] = assocTypeNil + } else { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrUndefinedOrdSym, + Detail: fmt.Sprintf("$%v", ordSym), + Row: prodsAndActs.prodPrecPoss[prod.id].Row, + Col: prodsAndActs.prodPrecPoss[prod.id].Col, + }) + } + } else { + // A production inherits precedence and associativity from the right-most terminal symbol. + mostrightTerm := symbol.SymbolNil + for _, sym := range prod.rhs { + if !sym.IsTerminal() { + continue + } + mostrightTerm = sym + } + if !mostrightTerm.IsNil() { + prodPrec[prod.num] = termPrec[mostrightTerm.Num()] + prodAssoc[prod.num] = termAssoc[mostrightTerm.Num()] + } + } + } + if len(b.errs) > 0 { + return nil, nil + } + + return &precAndAssoc{ + termPrec: termPrec, + termAssoc: termAssoc, + prodPrec: prodPrec, + prodAssoc: prodAssoc, + }, nil +} + +func compile(gram *Grammar, opts ...BuildOption) (*spec.CompiledGrammar, *spec.Report, error) { + config := &buildConfig{} + for _, opt := range opts { + opt(config) + } + + lexSpec, err, cErrs := lexical.Compile(gram.lexSpec, lexical.CompressionLevelMax) + if err != nil { + if len(cErrs) > 0 { + var b strings.Builder + writeCompileError(&b, cErrs[0]) + for _, cerr := range cErrs[1:] { + fmt.Fprintf(&b, "\n") + writeCompileError(&b, cerr) + } + return nil, nil, fmt.Errorf(b.String()) + } + return nil, nil, err + } + + kind2Term := make([]int, len(lexSpec.KindNames)) + for i, k := range lexSpec.KindNames { + if k == spec.LexKindNameNil { + kind2Term[spec.LexKindIDNil] = symbol.SymbolNil.Num().Int() + continue + } + + sym, ok := gram.symbolTable.ToSymbol(k.String()) + if !ok { + return nil, nil, fmt.Errorf("terminal symbol '%v' was not found in a symbol table", k) + } + kind2Term[i] = sym.Num().Int() + } + + termTexts, err := gram.symbolTable.TerminalTexts() + if err != nil { + return nil, nil, err + } + + var termSkip []int + { + r := gram.symbolTable.Reader() + // I want to use gram.symbolTable.terminalSymbols() here instead of gram.symbolTable.terminalTexts(), + // but gram.symbolTable.terminalSymbols() is different in length from terminalTexts + // because it does not contain a predefined symbol, like EOF. + // Therefore, we use terminalTexts, although it takes more time to lookup for symbols. + termSkip = make([]int, len(termTexts)) + for _, t := range termTexts { + s, _ := r.ToSymbol(t) + for _, sk := range gram.skipSymbols { + if s != sk { + continue + } + termSkip[s.Num()] = 1 + break + } + } + } + + nonTerms, err := gram.symbolTable.NonTerminalTexts() + if err != nil { + return nil, nil, err + } + + firstSet, err := genFirstSet(gram.productionSet) + if err != nil { + return nil, nil, err + } + + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol, gram.errorSymbol) + if err != nil { + return nil, nil, err + } + + var tab *ParsingTable + var report *spec.Report + { + lalr1, err := genLALR1Automaton(lr0, gram.productionSet, firstSet) + if err != nil { + return nil, nil, err + } + + b := &lrTableBuilder{ + automaton: lalr1.lr0Automaton, + prods: gram.productionSet, + termCount: len(termTexts), + nonTermCount: len(nonTerms), + symTab: gram.symbolTable, + precAndAssoc: gram.precAndAssoc, + } + tab, err = b.build() + if err != nil { + return nil, nil, err + } + + if config.isReportingEnabled { + report, err = b.genReport(tab, gram) + if err != nil { + return nil, nil, err + } + } + } + + action := make([]int, len(tab.actionTable)) + for i, e := range tab.actionTable { + action[i] = int(e) + } + goTo := make([]int, len(tab.goToTable)) + for i, e := range tab.goToTable { + goTo[i] = int(e) + } + + lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1) + altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1) + recoverProds := make([]int, len(gram.productionSet.getAllProductions())+1) + astActEnties := make([][]int, len(gram.productionSet.getAllProductions())+1) + for _, p := range gram.productionSet.getAllProductions() { + lhsSyms[p.num] = p.lhs.Num().Int() + altSymCounts[p.num] = p.rhsLen + + if _, ok := gram.recoverProductions[p.id]; ok { + recoverProds[p.num] = 1 + } + + astAct, ok := gram.astActions[p.id] + if !ok { + continue + } + astActEntry := make([]int, len(astAct)) + for i, e := range astAct { + if e.expansion { + astActEntry[i] = e.position * -1 + } else { + astActEntry[i] = e.position + } + } + astActEnties[p.num] = astActEntry + } + + return &spec.CompiledGrammar{ + Name: gram.name, + Lexical: lexSpec, + Syntactic: &spec.SyntacticSpec{ + Action: action, + GoTo: goTo, + StateCount: tab.stateCount, + InitialState: tab.InitialState.Int(), + StartProduction: productionNumStart.Int(), + LHSSymbols: lhsSyms, + AlternativeSymbolCounts: altSymCounts, + Terminals: termTexts, + TerminalCount: tab.terminalCount, + TerminalSkip: termSkip, + KindToTerminal: kind2Term, + NonTerminals: nonTerms, + NonTerminalCount: tab.nonTerminalCount, + EOFSymbol: symbol.SymbolEOF.Num().Int(), + ErrorSymbol: gram.errorSymbol.Num().Int(), + ErrorTrapperStates: tab.errorTrapperStates, + RecoverProductions: recoverProds, + }, + ASTAction: &spec.ASTAction{ + Entries: astActEnties, + }, + }, report, nil +} + +func writeCompileError(w io.Writer, cErr *lexical.CompileError) { + if cErr.Fragment { + fmt.Fprintf(w, "fragment ") + } + fmt.Fprintf(w, "%v: %v", cErr.Kind, cErr.Cause) + if cErr.Detail != "" { + fmt.Fprintf(w, ": %v", cErr.Detail) + } +} |