aboutsummaryrefslogtreecommitdiff
path: root/grammar/lr0_item.go
diff options
context:
space:
mode:
Diffstat (limited to 'grammar/lr0_item.go')
-rw-r--r--grammar/lr0_item.go343
1 files changed, 0 insertions, 343 deletions
diff --git a/grammar/lr0_item.go b/grammar/lr0_item.go
deleted file mode 100644
index 2934061..0000000
--- a/grammar/lr0_item.go
+++ /dev/null
@@ -1,343 +0,0 @@
-package grammar
-
-import (
- "crypto/sha256"
- "encoding/binary"
- "fmt"
- "sort"
- "strconv"
-)
-
-type lr0ItemID [32]byte
-
-func (id lr0ItemID) String() string {
- return fmt.Sprintf("%x", id.num())
-}
-
-func (id lr0ItemID) num() uint32 {
- return binary.LittleEndian.Uint32(id[:])
-}
-
-type lr0Item struct {
- id lr0ItemID
- prod productionID
-
- // E → E + T
- //
- // Dot | Dotted Symbol | Item
- // ----+---------------+------------
- // 0 | E | E →・E + T
- // 1 | + | E → E・+ T
- // 2 | T | E → E +・T
- // 3 | Nil | E → E + T・
- dot int
- dottedSymbol symbol
-
- // When initial is true, the LHS of the production is the augmented start symbol and dot is 0.
- // It looks like S' →・S.
- initial bool
-
- // When reducible is true, the item looks like E → E + T・.
- reducible bool
-
- // When kernel is true, the item is kernel item.
- kernel bool
-}
-
-func newLR0Item(prod *production, dot int) (*lr0Item, error) {
- if prod == nil {
- return nil, fmt.Errorf("production must be non-nil")
- }
-
- if dot < 0 || dot > prod.rhsLen {
- return nil, fmt.Errorf("dot must be between 0 and %v", prod.rhsLen)
- }
-
- var id lr0ItemID
- {
- b := []byte{}
- b = append(b, prod.id[:]...)
- bDot := make([]byte, 8)
- binary.LittleEndian.PutUint64(bDot, uint64(dot))
- b = append(b, bDot...)
- id = sha256.Sum256(b)
- }
-
- dottedSymbol := symbolNil
- if dot < prod.rhsLen {
- dottedSymbol = prod.rhs[dot]
- }
-
- initial := false
- if prod.lhs.isStart() && dot == 0 {
- initial = true
- }
-
- reducible := false
- if dot == prod.rhsLen {
- reducible = true
- }
-
- kernel := false
- if initial || dot > 0 {
- kernel = true
- }
-
- item := &lr0Item{
- id: id,
- prod: prod.id,
- dot: dot,
- dottedSymbol: dottedSymbol,
- initial: initial,
- reducible: reducible,
- kernel: kernel,
- }
-
- return item, nil
-}
-
-type kernelID [32]byte
-
-func (id kernelID) String() string {
- return fmt.Sprintf("%x", binary.LittleEndian.Uint32(id[:]))
-}
-
-type kernel struct {
- id kernelID
- items []*lr0Item
-}
-
-func newKernel(items []*lr0Item) (*kernel, error) {
- if len(items) == 0 {
- return nil, fmt.Errorf("a kernel need at least one item")
- }
-
- // Remove duplicates from items.
- var sortedItems []*lr0Item
- {
- m := map[lr0ItemID]*lr0Item{}
- for _, item := range items {
- if !item.kernel {
- return nil, fmt.Errorf("not a kernel item: %v", item)
- }
- m[item.id] = item
- }
- sortedItems = []*lr0Item{}
- for _, item := range m {
- sortedItems = append(sortedItems, item)
- }
- sort.Slice(sortedItems, func(i, j int) bool {
- return sortedItems[i].id.num() < sortedItems[j].id.num()
- })
- }
-
- var id kernelID
- {
- b := []byte{}
- for _, item := range sortedItems {
- b = append(b, item.id[:]...)
- }
- id = sha256.Sum256(b)
- }
-
- return &kernel{
- id: id,
- items: sortedItems,
- }, nil
-}
-
-type stateNum int
-
-const stateNumInitial = stateNum(0)
-
-func (n stateNum) Int() int {
- return int(n)
-}
-
-func (n stateNum) String() string {
- return strconv.Itoa(int(n))
-}
-
-func (n stateNum) next() stateNum {
- return stateNum(n + 1)
-}
-
-type lr0State struct {
- *kernel
- num stateNum
- next map[symbol]kernelID
- reducible map[productionID]struct{}
-}
-
-type lr0Automaton struct {
- initialState kernelID
- states map[kernelID]*lr0State
-}
-
-func genLR0Automaton(prods *productionSet, startSym symbol) (*lr0Automaton, error) {
- if !startSym.isStart() {
- return nil, fmt.Errorf("passed symbold is not a start symbol")
- }
-
- automaton := &lr0Automaton{
- states: map[kernelID]*lr0State{},
- }
-
- currentState := stateNumInitial
- knownKernels := map[kernelID]struct{}{}
- uncheckedKernels := []*kernel{}
-
- // Generate an initial kernel.
- {
- prods, _ := prods.findByLHS(startSym)
- initialItem, err := newLR0Item(prods[0], 0)
- if err != nil {
- return nil, err
- }
-
- k, err := newKernel([]*lr0Item{initialItem})
- if err != nil {
- return nil, err
- }
-
- automaton.initialState = k.id
- knownKernels[k.id] = struct{}{}
- uncheckedKernels = append(uncheckedKernels, k)
- }
-
- for len(uncheckedKernels) > 0 {
- nextUncheckedKernels := []*kernel{}
- for _, k := range uncheckedKernels {
- state, neighbours, err := genStateAndNeighbourKernels(k, prods)
- if err != nil {
- return nil, err
- }
- state.num = currentState
- currentState = currentState.next()
-
- automaton.states[state.id] = state
-
- for _, k := range neighbours {
- if _, known := knownKernels[k.id]; known {
- continue
- }
- knownKernels[k.id] = struct{}{}
- nextUncheckedKernels = append(nextUncheckedKernels, k)
- }
- }
- uncheckedKernels = nextUncheckedKernels
- }
-
- return automaton, nil
-}
-
-func genStateAndNeighbourKernels(k *kernel, prods *productionSet) (*lr0State, []*kernel, error) {
- items, err := genClosure(k, prods)
- if err != nil {
- return nil, nil, err
- }
- neighbours, err := genNeighbourKernels(items, prods)
- if err != nil {
- return nil, nil, err
- }
-
- next := map[symbol]kernelID{}
- kernels := []*kernel{}
- for _, n := range neighbours {
- next[n.symbol] = n.kernel.id
- kernels = append(kernels, n.kernel)
- }
-
- reducible := map[productionID]struct{}{}
- for _, item := range items {
- if item.reducible {
- reducible[item.prod] = struct{}{}
- }
- }
-
- return &lr0State{
- kernel: k,
- next: next,
- reducible: reducible,
- }, kernels, nil
-}
-
-func genClosure(k *kernel, prods *productionSet) ([]*lr0Item, error) {
- items := []*lr0Item{}
- knownItems := map[lr0ItemID]struct{}{}
- uncheckedItems := []*lr0Item{}
- for _, item := range k.items {
- items = append(items, item)
- uncheckedItems = append(uncheckedItems, item)
- }
- for len(uncheckedItems) > 0 {
- nextUncheckedItems := []*lr0Item{}
- for _, item := range uncheckedItems {
- if item.dottedSymbol.isTerminal() {
- continue
- }
-
- ps, _ := prods.findByLHS(item.dottedSymbol)
- for _, prod := range ps {
- item, err := newLR0Item(prod, 0)
- if err != nil {
- return nil, err
- }
- if _, exist := knownItems[item.id]; exist {
- continue
- }
- items = append(items, item)
- knownItems[item.id] = struct{}{}
- nextUncheckedItems = append(nextUncheckedItems, item)
- }
- }
- uncheckedItems = nextUncheckedItems
- }
-
- return items, nil
-}
-
-type neighbourKernel struct {
- symbol symbol
- kernel *kernel
-}
-
-func genNeighbourKernels(items []*lr0Item, prods *productionSet) ([]*neighbourKernel, error) {
- kItemMap := map[symbol][]*lr0Item{}
- for _, item := range items {
- if item.dottedSymbol.isNil() {
- continue
- }
- prod, ok := prods.findByID(item.prod)
- if !ok {
- return nil, fmt.Errorf("a production was not found: %v", item.prod)
- }
- kItem, err := newLR0Item(prod, item.dot+1)
- if err != nil {
- return nil, err
- }
- kItemMap[item.dottedSymbol] = append(kItemMap[item.dottedSymbol], kItem)
- }
-
- nextSyms := []symbol{}
- for sym := range kItemMap {
- nextSyms = append(nextSyms, sym)
- }
- sort.Slice(nextSyms, func(i, j int) bool {
- return nextSyms[i] < nextSyms[j]
- })
-
- kernels := []*neighbourKernel{}
- for _, sym := range nextSyms {
- k, err := newKernel(kItemMap[sym])
- if err != nil {
- return nil, err
- }
- kernels = append(kernels, &neighbourKernel{
- symbol: sym,
- kernel: k,
- })
- }
-
- return kernels, nil
-}