diff options
Diffstat (limited to 'grammar/lexical/dfa')
-rw-r--r-- | grammar/lexical/dfa/dfa.go | 173 | ||||
-rw-r--r-- | grammar/lexical/dfa/dfa_test.go | 121 | ||||
-rw-r--r-- | grammar/lexical/dfa/symbol_position.go | 182 | ||||
-rw-r--r-- | grammar/lexical/dfa/symbol_position_test.go | 79 | ||||
-rw-r--r-- | grammar/lexical/dfa/tree.go | 567 | ||||
-rw-r--r-- | grammar/lexical/dfa/tree_test.go | 257 |
6 files changed, 0 insertions, 1379 deletions
diff --git a/grammar/lexical/dfa/dfa.go b/grammar/lexical/dfa/dfa.go deleted file mode 100644 index 884b168..0000000 --- a/grammar/lexical/dfa/dfa.go +++ /dev/null @@ -1,173 +0,0 @@ -package dfa - -import ( - "sort" - - spec "spec/grammar" -) - -type symbolTable struct { - symPos2Byte map[symbolPosition]byteRange - endPos2ID map[symbolPosition]spec.LexModeKindID -} - -func genSymbolTable(root byteTree) *symbolTable { - symTab := &symbolTable{ - symPos2Byte: map[symbolPosition]byteRange{}, - endPos2ID: map[symbolPosition]spec.LexModeKindID{}, - } - return genSymTab(symTab, root) -} - -func genSymTab(symTab *symbolTable, node byteTree) *symbolTable { - if node == nil { - return symTab - } - - switch n := node.(type) { - case *symbolNode: - symTab.symPos2Byte[n.pos] = byteRange{ - from: n.from, - to: n.to, - } - case *endMarkerNode: - symTab.endPos2ID[n.pos] = n.id - default: - left, right := node.children() - genSymTab(symTab, left) - genSymTab(symTab, right) - } - return symTab -} - -type DFA struct { - States []string - InitialState string - AcceptingStatesTable map[string]spec.LexModeKindID - TransitionTable map[string][256]string -} - -func GenDFA(root byteTree, symTab *symbolTable) *DFA { - initialState := root.first() - initialStateHash := initialState.hash() - stateMap := map[string]*symbolPositionSet{ - initialStateHash: initialState, - } - tranTab := map[string][256]string{} - { - follow := genFollowTable(root) - unmarkedStates := map[string]*symbolPositionSet{ - initialStateHash: initialState, - } - for len(unmarkedStates) > 0 { - nextUnmarkedStates := map[string]*symbolPositionSet{} - for hash, state := range unmarkedStates { - tranTabOfState := [256]*symbolPositionSet{} - for _, pos := range state.set() { - if pos.isEndMark() { - continue - } - valRange := symTab.symPos2Byte[pos] - for symVal := valRange.from; symVal <= valRange.to; symVal++ { - if tranTabOfState[symVal] == nil { - tranTabOfState[symVal] = newSymbolPositionSet() - } - tranTabOfState[symVal].merge(follow[pos]) - } - } - for _, t := range tranTabOfState { - if t == nil { - continue - } - h := t.hash() - if _, ok := stateMap[h]; ok { - continue - } - stateMap[h] = t - nextUnmarkedStates[h] = t - } - tabOfState := [256]string{} - for v, t := range tranTabOfState { - if t == nil { - continue - } - tabOfState[v] = t.hash() - } - tranTab[hash] = tabOfState - } - unmarkedStates = nextUnmarkedStates - } - } - - accTab := map[string]spec.LexModeKindID{} - { - for h, s := range stateMap { - for _, pos := range s.set() { - if !pos.isEndMark() { - continue - } - priorID, ok := accTab[h] - if !ok { - accTab[h] = symTab.endPos2ID[pos] - } else { - id := symTab.endPos2ID[pos] - if id < priorID { - accTab[h] = id - } - } - } - } - } - - var states []string - { - for s := range stateMap { - states = append(states, s) - } - sort.Slice(states, func(i, j int) bool { - return states[i] < states[j] - }) - } - - return &DFA{ - States: states, - InitialState: initialStateHash, - AcceptingStatesTable: accTab, - TransitionTable: tranTab, - } -} - -func GenTransitionTable(dfa *DFA) (*spec.TransitionTable, error) { - stateHash2ID := map[string]spec.StateID{} - for i, s := range dfa.States { - // Since 0 represents an invalid value in a transition table, - // assign a number greater than or equal to 1 to states. - stateHash2ID[s] = spec.StateID(i + spec.StateIDMin.Int()) - } - - acc := make([]spec.LexModeKindID, len(dfa.States)+1) - for _, s := range dfa.States { - id, ok := dfa.AcceptingStatesTable[s] - if !ok { - continue - } - acc[stateHash2ID[s]] = id - } - - rowCount := len(dfa.States) + 1 - colCount := 256 - tran := make([]spec.StateID, rowCount*colCount) - for s, tab := range dfa.TransitionTable { - for v, to := range tab { - tran[stateHash2ID[s].Int()*256+v] = stateHash2ID[to] - } - } - - return &spec.TransitionTable{ - InitialStateID: stateHash2ID[dfa.InitialState], - AcceptingStates: acc, - UncompressedTransition: tran, - RowCount: rowCount, - ColCount: colCount, - }, nil -} diff --git a/grammar/lexical/dfa/dfa_test.go b/grammar/lexical/dfa/dfa_test.go deleted file mode 100644 index 9af9aeb..0000000 --- a/grammar/lexical/dfa/dfa_test.go +++ /dev/null @@ -1,121 +0,0 @@ -package dfa - -import ( - "strings" - "testing" - - "grammar/lexical/parser" - spec "spec/grammar" -) - -func TestGenDFA(t *testing.T) { - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - dfa := GenDFA(bt, symTab) - if dfa == nil { - t.Fatalf("DFA is nil") - } - - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)) - s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4)) - s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5)) - s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6)) - - rune2Int := func(char rune, index int) uint8 { - return uint8([]byte(string(char))[index]) - } - - tranS0 := [256]string{} - tranS0[rune2Int('a', 0)] = s1.hash() - tranS0[rune2Int('b', 0)] = s0.hash() - - tranS1 := [256]string{} - tranS1[rune2Int('a', 0)] = s1.hash() - tranS1[rune2Int('b', 0)] = s2.hash() - - tranS2 := [256]string{} - tranS2[rune2Int('a', 0)] = s1.hash() - tranS2[rune2Int('b', 0)] = s3.hash() - - tranS3 := [256]string{} - tranS3[rune2Int('a', 0)] = s1.hash() - tranS3[rune2Int('b', 0)] = s0.hash() - - expectedTranTab := map[string][256]string{ - s0.hash(): tranS0, - s1.hash(): tranS1, - s2.hash(): tranS2, - s3.hash(): tranS3, - } - if len(dfa.TransitionTable) != len(expectedTranTab) { - t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable)) - } - for h, eTranTab := range expectedTranTab { - tranTab, ok := dfa.TransitionTable[h] - if !ok { - t.Errorf("no entry; hash: %v", h) - continue - } - if len(tranTab) != len(eTranTab) { - t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab)) - } - for c, eNext := range eTranTab { - if eNext == "" { - continue - } - - next := tranTab[c] - if next == "" { - t.Errorf("no enatry: hash: %v, char: %v", h, c) - } - if next != eNext { - t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next) - } - } - } - - if dfa.InitialState != s0.hash() { - t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState) - } - - accTab := map[string]spec.LexModeKindID{ - s3.hash(): 1, - } - if len(dfa.AcceptingStatesTable) != len(accTab) { - t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable)) - } - for eState, eID := range accTab { - id, ok := dfa.AcceptingStatesTable[eState] - if !ok { - t.Errorf("accepting state is not found: state: %v", eState) - } - if id != eID { - t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id) - } - } -} diff --git a/grammar/lexical/dfa/symbol_position.go b/grammar/lexical/dfa/symbol_position.go deleted file mode 100644 index f154251..0000000 --- a/grammar/lexical/dfa/symbol_position.go +++ /dev/null @@ -1,182 +0,0 @@ -package dfa - -import ( - "encoding/binary" - "fmt" - "strings" -) - -type symbolPosition uint16 - -const ( - symbolPositionNil symbolPosition = 0x0000 - - symbolPositionMin uint16 = 0x0001 - symbolPositionMax uint16 = 0x7fff - - symbolPositionMaskSymbol uint16 = 0x0000 - symbolPositionMaskEndMark uint16 = 0x8000 - - symbolPositionMaskValue uint16 = 0x7fff -) - -func newSymbolPosition(n uint16, endMark bool) (symbolPosition, error) { - if n < symbolPositionMin || n > symbolPositionMax { - return symbolPositionNil, fmt.Errorf("symbol position must be within %v to %v: n: %v, endMark: %v", symbolPositionMin, symbolPositionMax, n, endMark) - } - if endMark { - return symbolPosition(n | symbolPositionMaskEndMark), nil - } - return symbolPosition(n | symbolPositionMaskSymbol), nil -} - -func (p symbolPosition) String() string { - if p.isEndMark() { - return fmt.Sprintf("end#%v", uint16(p)&symbolPositionMaskValue) - } - return fmt.Sprintf("sym#%v", uint16(p)&symbolPositionMaskValue) -} - -func (p symbolPosition) isEndMark() bool { - return uint16(p)&symbolPositionMaskEndMark > 1 -} - -func (p symbolPosition) describe() (uint16, bool) { - v := uint16(p) & symbolPositionMaskValue - if p.isEndMark() { - return v, true - } - return v, false -} - -type symbolPositionSet struct { - // `s` represents a set of symbol positions. - // However, immediately after adding a symbol position, the elements may be duplicated. - // When you need an aligned set with no duplicates, you can get such value via the set function. - s []symbolPosition - sorted bool -} - -func newSymbolPositionSet() *symbolPositionSet { - return &symbolPositionSet{ - s: []symbolPosition{}, - sorted: false, - } -} - -func (s *symbolPositionSet) String() string { - if len(s.s) <= 0 { - return "{}" - } - ps := s.sortAndRemoveDuplicates() - var b strings.Builder - fmt.Fprintf(&b, "{") - for i, p := range ps { - if i <= 0 { - fmt.Fprintf(&b, "%v", p) - continue - } - fmt.Fprintf(&b, ", %v", p) - } - fmt.Fprintf(&b, "}") - return b.String() -} - -func (s *symbolPositionSet) set() []symbolPosition { - s.sortAndRemoveDuplicates() - return s.s -} - -func (s *symbolPositionSet) add(pos symbolPosition) *symbolPositionSet { - s.s = append(s.s, pos) - s.sorted = false - return s -} - -func (s *symbolPositionSet) merge(t *symbolPositionSet) *symbolPositionSet { - s.s = append(s.s, t.s...) - s.sorted = false - return s -} - -func (s *symbolPositionSet) hash() string { - if len(s.s) <= 0 { - return "" - } - sorted := s.sortAndRemoveDuplicates() - var buf []byte - for _, p := range sorted { - b := make([]byte, 8) - binary.PutUvarint(b, uint64(p)) - buf = append(buf, b...) - } - // Convert to a string to be able to use it as a key of a map. - // But note this byte sequence is made from values of symbol positions, - // so this is not a well-formed UTF-8 sequence. - return string(buf) -} - -func (s *symbolPositionSet) sortAndRemoveDuplicates() []symbolPosition { - if s.sorted { - return s.s - } - - sortSymbolPositions(s.s, 0, len(s.s)-1) - - // Remove duplicates. - lastV := s.s[0] - nextIdx := 1 - for _, v := range s.s[1:] { - if v == lastV { - continue - } - s.s[nextIdx] = v - nextIdx++ - lastV = v - } - s.s = s.s[:nextIdx] - s.sorted = true - - return s.s -} - -// sortSymbolPositions sorts a slice of symbol positions as it uses quick sort. -func sortSymbolPositions(ps []symbolPosition, left, right int) { - if left >= right { - return - } - var pivot symbolPosition - { - // Use a median as a pivot. - p1 := ps[left] - p2 := ps[(left+right)/2] - p3 := ps[right] - if p1 > p2 { - p1, p2 = p2, p1 - } - if p2 > p3 { - p2 = p3 - if p1 > p2 { - p2 = p1 - } - } - pivot = p2 - } - i := left - j := right - for i <= j { - for ps[i] < pivot { - i++ - } - for ps[j] > pivot { - j-- - } - if i <= j { - ps[i], ps[j] = ps[j], ps[i] - i++ - j-- - } - } - sortSymbolPositions(ps, left, j) - sortSymbolPositions(ps, i, right) -} diff --git a/grammar/lexical/dfa/symbol_position_test.go b/grammar/lexical/dfa/symbol_position_test.go deleted file mode 100644 index c867f64..0000000 --- a/grammar/lexical/dfa/symbol_position_test.go +++ /dev/null @@ -1,79 +0,0 @@ -package dfa - -import ( - "fmt" - "testing" -) - -func TestNewSymbolPosition(t *testing.T) { - tests := []struct { - n uint16 - endMark bool - err bool - }{ - { - n: 0, - endMark: false, - err: true, - }, - { - n: 0, - endMark: true, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMin - 1, - endMark: true, - err: true, - }, - { - n: symbolPositionMin, - endMark: false, - }, - { - n: symbolPositionMin, - endMark: true, - }, - { - n: symbolPositionMax, - endMark: false, - }, - { - n: symbolPositionMax, - endMark: true, - }, - { - n: symbolPositionMax + 1, - endMark: false, - err: true, - }, - { - n: symbolPositionMax + 1, - endMark: true, - err: true, - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) { - pos, err := newSymbolPosition(tt.n, tt.endMark) - if tt.err { - if err == nil { - t.Fatal("err is nil") - } - return - } - if err != nil { - t.Fatal(err) - } - n, endMark := pos.describe() - if n != tt.n || endMark != tt.endMark { - t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark) - } - }) - } -} diff --git a/grammar/lexical/dfa/tree.go b/grammar/lexical/dfa/tree.go deleted file mode 100644 index 85061f9..0000000 --- a/grammar/lexical/dfa/tree.go +++ /dev/null @@ -1,567 +0,0 @@ -package dfa - -import ( - "fmt" - "io" - "sort" - - "grammar/lexical/parser" - spec "spec/grammar" - "utf8" -) - -type byteTree interface { - fmt.Stringer - children() (byteTree, byteTree) - nullable() bool - first() *symbolPositionSet - last() *symbolPositionSet - clone() byteTree -} - -var ( - _ byteTree = &symbolNode{} - _ byteTree = &endMarkerNode{} - _ byteTree = &concatNode{} - _ byteTree = &altNode{} - _ byteTree = &repeatNode{} - _ byteTree = &optionNode{} -) - -type byteRange struct { - from byte - to byte -} - -type symbolNode struct { - byteRange - pos symbolPosition - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newSymbolNode(value byte) *symbolNode { - return &symbolNode{ - byteRange: byteRange{ - from: value, - to: value, - }, - pos: symbolPositionNil, - } -} - -func newRangeSymbolNode(from, to byte) *symbolNode { - return &symbolNode{ - byteRange: byteRange{ - from: from, - to: to, - }, - pos: symbolPositionNil, - } -} - -func (n *symbolNode) String() string { - return fmt.Sprintf("symbol: value: %v-%v, pos: %v", n.from, n.to, n.pos) -} - -func (n *symbolNode) children() (byteTree, byteTree) { - return nil, nil -} - -func (n *symbolNode) nullable() bool { - return false -} - -func (n *symbolNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.add(n.pos) - } - return n.firstMemo -} - -func (n *symbolNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.add(n.pos) - } - return n.lastMemo -} - -func (n *symbolNode) clone() byteTree { - return newRangeSymbolNode(n.from, n.to) -} - -type endMarkerNode struct { - id spec.LexModeKindID - pos symbolPosition - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newEndMarkerNode(id spec.LexModeKindID) *endMarkerNode { - return &endMarkerNode{ - id: id, - pos: symbolPositionNil, - } -} - -func (n *endMarkerNode) String() string { - return fmt.Sprintf("end: pos: %v", n.pos) -} - -func (n *endMarkerNode) children() (byteTree, byteTree) { - return nil, nil -} - -func (n *endMarkerNode) nullable() bool { - return false -} - -func (n *endMarkerNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.add(n.pos) - } - return n.firstMemo -} - -func (n *endMarkerNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.add(n.pos) - } - return n.lastMemo -} - -func (n *endMarkerNode) clone() byteTree { - return newEndMarkerNode(n.id) -} - -type concatNode struct { - left byteTree - right byteTree - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newConcatNode(left, right byteTree) *concatNode { - return &concatNode{ - left: left, - right: right, - } -} - -func (n *concatNode) String() string { - return "concat" -} - -func (n *concatNode) children() (byteTree, byteTree) { - return n.left, n.right -} - -func (n *concatNode) nullable() bool { - return n.left.nullable() && n.right.nullable() -} - -func (n *concatNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.merge(n.left.first()) - if n.left.nullable() { - n.firstMemo.merge(n.right.first()) - } - n.firstMemo.sortAndRemoveDuplicates() - } - return n.firstMemo -} - -func (n *concatNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.merge(n.right.last()) - if n.right.nullable() { - n.lastMemo.merge(n.left.last()) - } - n.lastMemo.sortAndRemoveDuplicates() - } - return n.lastMemo -} - -func (n *concatNode) clone() byteTree { - return newConcatNode(n.left.clone(), n.right.clone()) -} - -type altNode struct { - left byteTree - right byteTree - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newAltNode(left, right byteTree) *altNode { - return &altNode{ - left: left, - right: right, - } -} - -func (n *altNode) String() string { - return "alt" -} - -func (n *altNode) children() (byteTree, byteTree) { - return n.left, n.right -} - -func (n *altNode) nullable() bool { - return n.left.nullable() || n.right.nullable() -} - -func (n *altNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.merge(n.left.first()) - n.firstMemo.merge(n.right.first()) - n.firstMemo.sortAndRemoveDuplicates() - } - return n.firstMemo -} - -func (n *altNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.merge(n.left.last()) - n.lastMemo.merge(n.right.last()) - n.lastMemo.sortAndRemoveDuplicates() - } - return n.lastMemo -} - -func (n *altNode) clone() byteTree { - return newAltNode(n.left.clone(), n.right.clone()) -} - -type repeatNode struct { - left byteTree - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newRepeatNode(left byteTree) *repeatNode { - return &repeatNode{ - left: left, - } -} - -func (n *repeatNode) String() string { - return "repeat" -} - -func (n *repeatNode) children() (byteTree, byteTree) { - return n.left, nil -} - -func (n *repeatNode) nullable() bool { - return true -} - -func (n *repeatNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.merge(n.left.first()) - n.firstMemo.sortAndRemoveDuplicates() - } - return n.firstMemo -} - -func (n *repeatNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.merge(n.left.last()) - n.lastMemo.sortAndRemoveDuplicates() - } - return n.lastMemo -} - -func (n *repeatNode) clone() byteTree { - return newRepeatNode(n.left.clone()) -} - -type optionNode struct { - left byteTree - firstMemo *symbolPositionSet - lastMemo *symbolPositionSet -} - -func newOptionNode(left byteTree) *optionNode { - return &optionNode{ - left: left, - } -} - -func (n *optionNode) String() string { - return "option" -} - -func (n *optionNode) children() (byteTree, byteTree) { - return n.left, nil -} - -func (n *optionNode) nullable() bool { - return true -} - -func (n *optionNode) first() *symbolPositionSet { - if n.firstMemo == nil { - n.firstMemo = newSymbolPositionSet() - n.firstMemo.merge(n.left.first()) - n.firstMemo.sortAndRemoveDuplicates() - } - return n.firstMemo -} - -func (n *optionNode) last() *symbolPositionSet { - if n.lastMemo == nil { - n.lastMemo = newSymbolPositionSet() - n.lastMemo.merge(n.left.last()) - n.lastMemo.sortAndRemoveDuplicates() - } - return n.lastMemo -} - -func (n *optionNode) clone() byteTree { - return newOptionNode(n.left.clone()) -} - -type followTable map[symbolPosition]*symbolPositionSet - -func genFollowTable(root byteTree) followTable { - follow := followTable{} - calcFollow(follow, root) - return follow -} - -func calcFollow(follow followTable, ast byteTree) { - if ast == nil { - return - } - left, right := ast.children() - calcFollow(follow, left) - calcFollow(follow, right) - switch n := ast.(type) { - case *concatNode: - l, r := n.children() - for _, p := range l.last().set() { - if _, ok := follow[p]; !ok { - follow[p] = newSymbolPositionSet() - } - follow[p].merge(r.first()) - } - case *repeatNode: - for _, p := range n.last().set() { - if _, ok := follow[p]; !ok { - follow[p] = newSymbolPositionSet() - } - follow[p].merge(n.first()) - } - } -} - -func positionSymbols(node byteTree, n uint16) (uint16, error) { - if node == nil { - return n, nil - } - - l, r := node.children() - p := n - p, err := positionSymbols(l, p) - if err != nil { - return p, err - } - p, err = positionSymbols(r, p) - if err != nil { - return p, err - } - switch n := node.(type) { - case *symbolNode: - n.pos, err = newSymbolPosition(p, false) - if err != nil { - return p, err - } - p++ - case *endMarkerNode: - n.pos, err = newSymbolPosition(p, true) - if err != nil { - return p, err - } - p++ - } - node.first() - node.last() - return p, nil -} - -func concat(ts ...byteTree) byteTree { - nonNilNodes := []byteTree{} - for _, t := range ts { - if t == nil { - continue - } - nonNilNodes = append(nonNilNodes, t) - } - if len(nonNilNodes) <= 0 { - return nil - } - if len(nonNilNodes) == 1 { - return nonNilNodes[0] - } - concat := newConcatNode(nonNilNodes[0], nonNilNodes[1]) - for _, t := range nonNilNodes[2:] { - concat = newConcatNode(concat, t) - } - return concat -} - -func oneOf(ts ...byteTree) byteTree { - nonNilNodes := []byteTree{} - for _, t := range ts { - if t == nil { - continue - } - nonNilNodes = append(nonNilNodes, t) - } - if len(nonNilNodes) <= 0 { - return nil - } - if len(nonNilNodes) == 1 { - return nonNilNodes[0] - } - alt := newAltNode(nonNilNodes[0], nonNilNodes[1]) - for _, t := range nonNilNodes[2:] { - alt = newAltNode(alt, t) - } - return alt -} - -//nolint:unused -func printByteTree(w io.Writer, t byteTree, ruledLine string, childRuledLinePrefix string, withAttrs bool) { - if t == nil { - return - } - fmt.Fprintf(w, "%v%v", ruledLine, t) - if withAttrs { - fmt.Fprintf(w, ", nullable: %v, first: %v, last: %v", t.nullable(), t.first(), t.last()) - } - fmt.Fprintf(w, "\n") - left, right := t.children() - children := []byteTree{} - if left != nil { - children = append(children, left) - } - if right != nil { - children = append(children, right) - } - num := len(children) - for i, child := range children { - line := "└─ " - if num > 1 { - if i == 0 { - line = "├─ " - } else if i < num-1 { - line = "│ " - } - } - prefix := "│ " - if i >= num-1 { - prefix = " " - } - printByteTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix, withAttrs) - } -} - -func ConvertCPTreeToByteTree(cpTrees map[spec.LexModeKindID]parser.CPTree) (byteTree, *symbolTable, error) { - var ids []spec.LexModeKindID - for id := range cpTrees { - ids = append(ids, id) - } - sort.Slice(ids, func(i, j int) bool { - return ids[i] < ids[j] - }) - - var bt byteTree - for _, id := range ids { - cpTree := cpTrees[id] - t, err := convCPTreeToByteTree(cpTree) - if err != nil { - return nil, nil, err - } - bt = oneOf(bt, concat(t, newEndMarkerNode(id))) - } - _, err := positionSymbols(bt, symbolPositionMin) - if err != nil { - return nil, nil, err - } - - return bt, genSymbolTable(bt), nil -} - -func convCPTreeToByteTree(cpTree parser.CPTree) (byteTree, error) { - if from, to, ok := cpTree.Range(); ok { - bs, err := utf8.GenCharBlocks(from, to) - if err != nil { - return nil, err - } - var a byteTree - for _, b := range bs { - var c byteTree - for i := 0; i < len(b.From); i++ { - c = concat(c, newRangeSymbolNode(b.From[i], b.To[i])) - } - a = oneOf(a, c) - } - return a, nil - } - - if tree, ok := cpTree.Repeatable(); ok { - t, err := convCPTreeToByteTree(tree) - if err != nil { - return nil, err - } - return newRepeatNode(t), nil - } - - if tree, ok := cpTree.Optional(); ok { - t, err := convCPTreeToByteTree(tree) - if err != nil { - return nil, err - } - return newOptionNode(t), nil - } - - if left, right, ok := cpTree.Concatenation(); ok { - l, err := convCPTreeToByteTree(left) - if err != nil { - return nil, err - } - r, err := convCPTreeToByteTree(right) - if err != nil { - return nil, err - } - return newConcatNode(l, r), nil - } - - if left, right, ok := cpTree.Alternatives(); ok { - l, err := convCPTreeToByteTree(left) - if err != nil { - return nil, err - } - r, err := convCPTreeToByteTree(right) - if err != nil { - return nil, err - } - return newAltNode(l, r), nil - } - - return nil, fmt.Errorf("invalid tree type: %T", cpTree) -} diff --git a/grammar/lexical/dfa/tree_test.go b/grammar/lexical/dfa/tree_test.go deleted file mode 100644 index 188fe95..0000000 --- a/grammar/lexical/dfa/tree_test.go +++ /dev/null @@ -1,257 +0,0 @@ -package dfa - -import ( - "fmt" - "strings" - "testing" - - "grammar/lexical/parser" - spec "spec/grammar" -) - -func TestByteTree(t *testing.T) { - tests := []struct { - root byteTree - nullable bool - first *symbolPositionSet - last *symbolPositionSet - }{ - { - root: newSymbolNodeWithPos(0, 1), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newEndMarkerNodeWithPos(1, 1), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newConcatNode( - newSymbolNodeWithPos(0, 1), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(2), - }, - { - root: newConcatNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(2), - }, - { - root: newConcatNode( - newSymbolNodeWithPos(0, 1), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: false, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newConcatNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newSymbolNodeWithPos(0, 1), - newSymbolNodeWithPos(0, 2), - ), - nullable: false, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newSymbolNodeWithPos(0, 2), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newSymbolNodeWithPos(0, 1), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newAltNode( - newRepeatNode(newSymbolNodeWithPos(0, 1)), - newRepeatNode(newSymbolNodeWithPos(0, 2)), - ), - nullable: true, - first: newSymbolPositionSet().add(1).add(2), - last: newSymbolPositionSet().add(1).add(2), - }, - { - root: newRepeatNode(newSymbolNodeWithPos(0, 1)), - nullable: true, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - { - root: newOptionNode(newSymbolNodeWithPos(0, 1)), - nullable: true, - first: newSymbolPositionSet().add(1), - last: newSymbolPositionSet().add(1), - }, - } - for i, tt := range tests { - t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) { - if tt.root.nullable() != tt.nullable { - t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable()) - } - if tt.first.hash() != tt.root.first().hash() { - t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first()) - } - if tt.last.hash() != tt.root.last().hash() { - t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last()) - } - }) - } -} - -func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode { - n := newSymbolNode(v) - n.pos = pos - return n -} - -func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode { - n := newEndMarkerNode(spec.LexModeKindID(id)) - n.pos = pos - return n -} - -func TestFollowAndSymbolTable(t *testing.T) { - symPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, false) - if err != nil { - panic(err) - } - return pos - } - - endPos := func(n uint16) symbolPosition { - pos, err := newSymbolPosition(n, true) - if err != nil { - panic(err) - } - return pos - } - - p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb")) - cpt, err := p.Parse() - if err != nil { - t.Fatal(err) - } - - bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{ - spec.LexModeKindIDMin: cpt, - }) - if err != nil { - t.Fatal(err) - } - - { - followTab := genFollowTable(bt) - if followTab == nil { - t.Fatal("follow table is nil") - } - expectedFollowTab := followTable{ - 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)), - 3: newSymbolPositionSet().add(symPos(4)), - 4: newSymbolPositionSet().add(symPos(5)), - 5: newSymbolPositionSet().add(endPos(6)), - } - testFollowTable(t, expectedFollowTab, followTab) - } - - { - entry := func(v byte) byteRange { - return byteRange{ - from: v, - to: v, - } - } - - expectedSymTab := &symbolTable{ - symPos2Byte: map[symbolPosition]byteRange{ - symPos(1): entry(byte('a')), - symPos(2): entry(byte('b')), - symPos(3): entry(byte('a')), - symPos(4): entry(byte('b')), - symPos(5): entry(byte('b')), - }, - endPos2ID: map[symbolPosition]spec.LexModeKindID{ - endPos(6): 1, - }, - } - testSymbolTable(t, expectedSymTab, symTab) - } -} - -func testFollowTable(t *testing.T, expected, actual followTable) { - if len(actual) != len(expected) { - t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual)) - } - for ePos, eSet := range expected { - aSet, ok := actual[ePos] - if !ok { - t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet) - } - if aSet.hash() != eSet.hash() { - t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet) - } - } -} - -func testSymbolTable(t *testing.T, expected, actual *symbolTable) { - t.Helper() - - if len(actual.symPos2Byte) != len(expected.symPos2Byte) { - t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte)) - } - for ePos, eByte := range expected.symPos2Byte { - byte, ok := actual.symPos2Byte[ePos] - if !ok { - t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte) - continue - } - if byte.from != eByte.from || byte.to != eByte.to { - t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte) - } - } - - if len(actual.endPos2ID) != len(expected.endPos2ID) { - t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID)) - } - for ePos, eID := range expected.endPos2ID { - id, ok := actual.endPos2ID[ePos] - if !ok { - t.Errorf("an end position entry is not found: %v -> %v", ePos, eID) - continue - } - if id != eID { - t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id) - } - } -} |