aboutsummaryrefslogtreecommitdiff
path: root/grammar/lr0_item.go
diff options
context:
space:
mode:
Diffstat (limited to 'grammar/lr0_item.go')
-rw-r--r--grammar/lr0_item.go343
1 files changed, 343 insertions, 0 deletions
diff --git a/grammar/lr0_item.go b/grammar/lr0_item.go
new file mode 100644
index 0000000..2934061
--- /dev/null
+++ b/grammar/lr0_item.go
@@ -0,0 +1,343 @@
+package grammar
+
+import (
+ "crypto/sha256"
+ "encoding/binary"
+ "fmt"
+ "sort"
+ "strconv"
+)
+
+type lr0ItemID [32]byte
+
+func (id lr0ItemID) String() string {
+ return fmt.Sprintf("%x", id.num())
+}
+
+func (id lr0ItemID) num() uint32 {
+ return binary.LittleEndian.Uint32(id[:])
+}
+
+type lr0Item struct {
+ id lr0ItemID
+ prod productionID
+
+ // E → E + T
+ //
+ // Dot | Dotted Symbol | Item
+ // ----+---------------+------------
+ // 0 | E | E →・E + T
+ // 1 | + | E → E・+ T
+ // 2 | T | E → E +・T
+ // 3 | Nil | E → E + T・
+ dot int
+ dottedSymbol symbol
+
+ // When initial is true, the LHS of the production is the augmented start symbol and dot is 0.
+ // It looks like S' →・S.
+ initial bool
+
+ // When reducible is true, the item looks like E → E + T・.
+ reducible bool
+
+ // When kernel is true, the item is kernel item.
+ kernel bool
+}
+
+func newLR0Item(prod *production, dot int) (*lr0Item, error) {
+ if prod == nil {
+ return nil, fmt.Errorf("production must be non-nil")
+ }
+
+ if dot < 0 || dot > prod.rhsLen {
+ return nil, fmt.Errorf("dot must be between 0 and %v", prod.rhsLen)
+ }
+
+ var id lr0ItemID
+ {
+ b := []byte{}
+ b = append(b, prod.id[:]...)
+ bDot := make([]byte, 8)
+ binary.LittleEndian.PutUint64(bDot, uint64(dot))
+ b = append(b, bDot...)
+ id = sha256.Sum256(b)
+ }
+
+ dottedSymbol := symbolNil
+ if dot < prod.rhsLen {
+ dottedSymbol = prod.rhs[dot]
+ }
+
+ initial := false
+ if prod.lhs.isStart() && dot == 0 {
+ initial = true
+ }
+
+ reducible := false
+ if dot == prod.rhsLen {
+ reducible = true
+ }
+
+ kernel := false
+ if initial || dot > 0 {
+ kernel = true
+ }
+
+ item := &lr0Item{
+ id: id,
+ prod: prod.id,
+ dot: dot,
+ dottedSymbol: dottedSymbol,
+ initial: initial,
+ reducible: reducible,
+ kernel: kernel,
+ }
+
+ return item, nil
+}
+
+type kernelID [32]byte
+
+func (id kernelID) String() string {
+ return fmt.Sprintf("%x", binary.LittleEndian.Uint32(id[:]))
+}
+
+type kernel struct {
+ id kernelID
+ items []*lr0Item
+}
+
+func newKernel(items []*lr0Item) (*kernel, error) {
+ if len(items) == 0 {
+ return nil, fmt.Errorf("a kernel need at least one item")
+ }
+
+ // Remove duplicates from items.
+ var sortedItems []*lr0Item
+ {
+ m := map[lr0ItemID]*lr0Item{}
+ for _, item := range items {
+ if !item.kernel {
+ return nil, fmt.Errorf("not a kernel item: %v", item)
+ }
+ m[item.id] = item
+ }
+ sortedItems = []*lr0Item{}
+ for _, item := range m {
+ sortedItems = append(sortedItems, item)
+ }
+ sort.Slice(sortedItems, func(i, j int) bool {
+ return sortedItems[i].id.num() < sortedItems[j].id.num()
+ })
+ }
+
+ var id kernelID
+ {
+ b := []byte{}
+ for _, item := range sortedItems {
+ b = append(b, item.id[:]...)
+ }
+ id = sha256.Sum256(b)
+ }
+
+ return &kernel{
+ id: id,
+ items: sortedItems,
+ }, nil
+}
+
+type stateNum int
+
+const stateNumInitial = stateNum(0)
+
+func (n stateNum) Int() int {
+ return int(n)
+}
+
+func (n stateNum) String() string {
+ return strconv.Itoa(int(n))
+}
+
+func (n stateNum) next() stateNum {
+ return stateNum(n + 1)
+}
+
+type lr0State struct {
+ *kernel
+ num stateNum
+ next map[symbol]kernelID
+ reducible map[productionID]struct{}
+}
+
+type lr0Automaton struct {
+ initialState kernelID
+ states map[kernelID]*lr0State
+}
+
+func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) {
+ if !startSym.isStart() {
+ return nil, fmt.Errorf("passed symbold is not a start symbol")
+ }
+
+ automaton := &lr0Automaton{
+ states: map[kernelID]*lr0State{},
+ }
+
+ currentState := stateNumInitial
+ knownKernels := map[kernelID]struct{}{}
+ uncheckedKernels := []*kernel{}
+
+ // Generate an initial kernel.
+ {
+ prods, _ := prods.findByLHS(startSym)
+ initialItem, err := newLR0Item(prods[0], 0)
+ if err != nil {
+ return nil, err
+ }
+
+ k, err := newKernel([]*lr0Item{initialItem})
+ if err != nil {
+ return nil, err
+ }
+
+ automaton.initialState = k.id
+ knownKernels[k.id] = struct{}{}
+ uncheckedKernels = append(uncheckedKernels, k)
+ }
+
+ for len(uncheckedKernels) > 0 {
+ nextUncheckedKernels := []*kernel{}
+ for _, k := range uncheckedKernels {
+ state, neighbours, err := genStateAndNeighbourKernels(k, prods)
+ if err != nil {
+ return nil, err
+ }
+ state.num = currentState
+ currentState = currentState.next()
+
+ automaton.states[state.id] = state
+
+ for _, k := range neighbours {
+ if _, known := knownKernels[k.id]; known {
+ continue
+ }
+ knownKernels[k.id] = struct{}{}
+ nextUncheckedKernels = append(nextUncheckedKernels, k)
+ }
+ }
+ uncheckedKernels = nextUncheckedKernels
+ }
+
+ return automaton, nil
+}
+
+func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lr0State, []*kernel, error) {
+ items, err := genClosure(k, prods)
+ if err != nil {
+ return nil, nil, err
+ }
+ neighbours, err := genNeighbourKernels(items, prods)
+ if err != nil {
+ return nil, nil, err
+ }
+
+ next := map[symbol]kernelID{}
+ kernels := []*kernel{}
+ for _, n := range neighbours {
+ next[n.symbol] = n.kernel.id
+ kernels = append(kernels, n.kernel)
+ }
+
+ reducible := map[productionID]struct{}{}
+ for _, item := range items {
+ if item.reducible {
+ reducible[item.prod] = struct{}{}
+ }
+ }
+
+ return &lr0State{
+ kernel: k,
+ next: next,
+ reducible: reducible,
+ }, kernels, nil
+}
+
+func genClosure(k *kernel, prods *productionSet) ([]*lr0Item, error) {
+ items := []*lr0Item{}
+ knownItems := map[lr0ItemID]struct{}{}
+ uncheckedItems := []*lr0Item{}
+ for _, item := range k.items {
+ items = append(items, item)
+ uncheckedItems = append(uncheckedItems, item)
+ }
+ for len(uncheckedItems) > 0 {
+ nextUncheckedItems := []*lr0Item{}
+ for _, item := range uncheckedItems {
+ if item.dottedSymbol.isTerminal() {
+ continue
+ }
+
+ ps, _ := prods.findByLHS(item.dottedSymbol)
+ for _, prod := range ps {
+ item, err := newLR0Item(prod, 0)
+ if err != nil {
+ return nil, err
+ }
+ if _, exist := knownItems[item.id]; exist {
+ continue
+ }
+ items = append(items, item)
+ knownItems[item.id] = struct{}{}
+ nextUncheckedItems = append(nextUncheckedItems, item)
+ }
+ }
+ uncheckedItems = nextUncheckedItems
+ }
+
+ return items, nil
+}
+
+type neighbourKernel struct {
+ symbol symbol
+ kernel *kernel
+}
+
+func genNeighbourKernels(items []*lr0Item, prods *productionSet) ([]*neighbourKernel, error) {
+ kItemMap := map[symbol][]*lr0Item{}
+ for _, item := range items {
+ if item.dottedSymbol.isNil() {
+ continue
+ }
+ prod, ok := prods.findByID(item.prod)
+ if !ok {
+ return nil, fmt.Errorf("a production was not found: %v", item.prod)
+ }
+ kItem, err := newLR0Item(prod, item.dot+1)
+ if err != nil {
+ return nil, err
+ }
+ kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem)
+ }
+
+ nextSyms := []symbol{}
+ for sym := range kItemMap {
+ nextSyms = append(nextSyms, sym)
+ }
+ sort.Slice(nextSyms, func(i, j int) bool {
+ return nextSyms[i] < nextSyms[j]
+ })
+
+ kernels := []*neighbourKernel{}
+ for _, sym := range nextSyms {
+ k, err := newKernel(kItemMap[sym])
+ if err != nil {
+ return nil, err
+ }
+ kernels = append(kernels, &neighbourKernel{
+ symbol: sym,
+ kernel: k,
+ })
+ }
+
+ return kernels, nil
+}