aboutsummaryrefslogtreecommitdiff
path: root/tests/unit/grammar/lexical/dfa/dfa.go
diff options
context:
space:
mode:
Diffstat (limited to 'tests/unit/grammar/lexical/dfa/dfa.go')
-rw-r--r--tests/unit/grammar/lexical/dfa/dfa.go445
1 files changed, 445 insertions, 0 deletions
diff --git a/tests/unit/grammar/lexical/dfa/dfa.go b/tests/unit/grammar/lexical/dfa/dfa.go
new file mode 100644
index 0000000..3233969
--- /dev/null
+++ b/tests/unit/grammar/lexical/dfa/dfa.go
@@ -0,0 +1,445 @@
+package dfa
+
+import (
+ "fmt"
+ "strings"
+ "testing"
+
+ "urubu/grammar/lexical/parser"
+ spec "urubu/spec/grammar"
+)
+
+func TestGenDFA(t *testing.T) {
+ p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
+ cpt, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+ bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
+ spec.LexModeKindIDMin: cpt,
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+ dfa := GenDFA(bt, symTab)
+ if dfa == nil {
+ t.Fatalf("DFA is nil")
+ }
+
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ s0 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3))
+ s1 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(4))
+ s2 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(symPos(5))
+ s3 := newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)).add(endPos(6))
+
+ rune2Int := func(char rune, index int) uint8 {
+ return uint8([]byte(string(char))[index])
+ }
+
+ tranS0 := [256]string{}
+ tranS0[rune2Int('a', 0)] = s1.hash()
+ tranS0[rune2Int('b', 0)] = s0.hash()
+
+ tranS1 := [256]string{}
+ tranS1[rune2Int('a', 0)] = s1.hash()
+ tranS1[rune2Int('b', 0)] = s2.hash()
+
+ tranS2 := [256]string{}
+ tranS2[rune2Int('a', 0)] = s1.hash()
+ tranS2[rune2Int('b', 0)] = s3.hash()
+
+ tranS3 := [256]string{}
+ tranS3[rune2Int('a', 0)] = s1.hash()
+ tranS3[rune2Int('b', 0)] = s0.hash()
+
+ expectedTranTab := map[string][256]string{
+ s0.hash(): tranS0,
+ s1.hash(): tranS1,
+ s2.hash(): tranS2,
+ s3.hash(): tranS3,
+ }
+ if len(dfa.TransitionTable) != len(expectedTranTab) {
+ t.Errorf("transition table is mismatched: want: %v entries, got: %v entries", len(expectedTranTab), len(dfa.TransitionTable))
+ }
+ for h, eTranTab := range expectedTranTab {
+ tranTab, ok := dfa.TransitionTable[h]
+ if !ok {
+ t.Errorf("no entry; hash: %v", h)
+ continue
+ }
+ if len(tranTab) != len(eTranTab) {
+ t.Errorf("transition table is mismatched: hash: %v, want: %v entries, got: %v entries", h, len(eTranTab), len(tranTab))
+ }
+ for c, eNext := range eTranTab {
+ if eNext == "" {
+ continue
+ }
+
+ next := tranTab[c]
+ if next == "" {
+ t.Errorf("no enatry: hash: %v, char: %v", h, c)
+ }
+ if next != eNext {
+ t.Errorf("next state is mismatched: want: %v, got: %v", eNext, next)
+ }
+ }
+ }
+
+ if dfa.InitialState != s0.hash() {
+ t.Errorf("initial state is mismatched: want: %v, got: %v", s0.hash(), dfa.InitialState)
+ }
+
+ accTab := map[string]spec.LexModeKindID{
+ s3.hash(): 1,
+ }
+ if len(dfa.AcceptingStatesTable) != len(accTab) {
+ t.Errorf("accepting states are mismatched: want: %v entries, got: %v entries", len(accTab), len(dfa.AcceptingStatesTable))
+ }
+ for eState, eID := range accTab {
+ id, ok := dfa.AcceptingStatesTable[eState]
+ if !ok {
+ t.Errorf("accepting state is not found: state: %v", eState)
+ }
+ if id != eID {
+ t.Errorf("ID is mismatched: state: %v, want: %v, got: %v", eState, eID, id)
+ }
+ }
+}
+
+func TestNewSymbolPosition(t *testing.T) {
+ tests := []struct {
+ n uint16
+ endMark bool
+ err bool
+ }{
+ {
+ n: 0,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: 0,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMin - 1,
+ endMark: true,
+ err: true,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMin,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: false,
+ },
+ {
+ n: symbolPositionMax,
+ endMark: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: false,
+ err: true,
+ },
+ {
+ n: symbolPositionMax + 1,
+ endMark: true,
+ err: true,
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v n: %v, endMark: %v", i, tt.n, tt.endMark), func(t *testing.T) {
+ pos, err := newSymbolPosition(tt.n, tt.endMark)
+ if tt.err {
+ if err == nil {
+ t.Fatal("err is nil")
+ }
+ return
+ }
+ if err != nil {
+ t.Fatal(err)
+ }
+ n, endMark := pos.describe()
+ if n != tt.n || endMark != tt.endMark {
+ t.Errorf("unexpected symbol position: want: n: %v, endMark: %v, got: n: %v, endMark: %v", tt.n, tt.endMark, n, endMark)
+ }
+ })
+ }
+}
+
+func TestByteTree(t *testing.T) {
+ tests := []struct {
+ root byteTree
+ nullable bool
+ first *symbolPositionSet
+ last *symbolPositionSet
+ }{
+ {
+ root: newSymbolNodeWithPos(0, 1),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newEndMarkerNodeWithPos(1, 1),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newConcatNode(
+ newSymbolNodeWithPos(0, 1),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(2),
+ },
+ {
+ root: newConcatNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(2),
+ },
+ {
+ root: newConcatNode(
+ newSymbolNodeWithPos(0, 1),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newConcatNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newSymbolNodeWithPos(0, 1),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: false,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newSymbolNodeWithPos(0, 2),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newSymbolNodeWithPos(0, 1),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newAltNode(
+ newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ newRepeatNode(newSymbolNodeWithPos(0, 2)),
+ ),
+ nullable: true,
+ first: newSymbolPositionSet().add(1).add(2),
+ last: newSymbolPositionSet().add(1).add(2),
+ },
+ {
+ root: newRepeatNode(newSymbolNodeWithPos(0, 1)),
+ nullable: true,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ {
+ root: newOptionNode(newSymbolNodeWithPos(0, 1)),
+ nullable: true,
+ first: newSymbolPositionSet().add(1),
+ last: newSymbolPositionSet().add(1),
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ if tt.root.nullable() != tt.nullable {
+ t.Errorf("unexpected nullable attribute; want: %v, got: %v", tt.nullable, tt.root.nullable())
+ }
+ if tt.first.hash() != tt.root.first().hash() {
+ t.Errorf("unexpected first positions attribute; want: %v, got: %v", tt.first, tt.root.first())
+ }
+ if tt.last.hash() != tt.root.last().hash() {
+ t.Errorf("unexpected last positions attribute; want: %v, got: %v", tt.last, tt.root.last())
+ }
+ })
+ }
+}
+
+func newSymbolNodeWithPos(v byte, pos symbolPosition) *symbolNode {
+ n := newSymbolNode(v)
+ n.pos = pos
+ return n
+}
+
+func newEndMarkerNodeWithPos(id int, pos symbolPosition) *endMarkerNode {
+ n := newEndMarkerNode(spec.LexModeKindID(id))
+ n.pos = pos
+ return n
+}
+
+func TestFollowAndSymbolTable(t *testing.T) {
+ symPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, false)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ endPos := func(n uint16) symbolPosition {
+ pos, err := newSymbolPosition(n, true)
+ if err != nil {
+ panic(err)
+ }
+ return pos
+ }
+
+ p := parser.NewParser(spec.LexKindName("test"), strings.NewReader("(a|b)*abb"))
+ cpt, err := p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ bt, symTab, err := ConvertCPTreeToByteTree(map[spec.LexModeKindID]parser.CPTree{
+ spec.LexModeKindIDMin: cpt,
+ })
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ {
+ followTab := genFollowTable(bt)
+ if followTab == nil {
+ t.Fatal("follow table is nil")
+ }
+ expectedFollowTab := followTable{
+ 1: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
+ 2: newSymbolPositionSet().add(symPos(1)).add(symPos(2)).add(symPos(3)),
+ 3: newSymbolPositionSet().add(symPos(4)),
+ 4: newSymbolPositionSet().add(symPos(5)),
+ 5: newSymbolPositionSet().add(endPos(6)),
+ }
+ testFollowTable(t, expectedFollowTab, followTab)
+ }
+
+ {
+ entry := func(v byte) byteRange {
+ return byteRange{
+ from: v,
+ to: v,
+ }
+ }
+
+ expectedSymTab := &symbolTable{
+ symPos2Byte: map[symbolPosition]byteRange{
+ symPos(1): entry(byte('a')),
+ symPos(2): entry(byte('b')),
+ symPos(3): entry(byte('a')),
+ symPos(4): entry(byte('b')),
+ symPos(5): entry(byte('b')),
+ },
+ endPos2ID: map[symbolPosition]spec.LexModeKindID{
+ endPos(6): 1,
+ },
+ }
+ testSymbolTable(t, expectedSymTab, symTab)
+ }
+}
+
+func testFollowTable(t *testing.T, expected, actual followTable) {
+ if len(actual) != len(expected) {
+ t.Errorf("unexpected number of the follow table entries; want: %v, got: %v", len(expected), len(actual))
+ }
+ for ePos, eSet := range expected {
+ aSet, ok := actual[ePos]
+ if !ok {
+ t.Fatalf("follow entry is not found: position: %v, follow: %v", ePos, eSet)
+ }
+ if aSet.hash() != eSet.hash() {
+ t.Fatalf("follow entry of position %v is mismatched: want: %v, got: %v", ePos, aSet, eSet)
+ }
+ }
+}
+
+func testSymbolTable(t *testing.T, expected, actual *symbolTable) {
+ t.Helper()
+
+ if len(actual.symPos2Byte) != len(expected.symPos2Byte) {
+ t.Errorf("unexpected symPos2Byte entries: want: %v entries, got: %v entries", len(expected.symPos2Byte), len(actual.symPos2Byte))
+ }
+ for ePos, eByte := range expected.symPos2Byte {
+ byte, ok := actual.symPos2Byte[ePos]
+ if !ok {
+ t.Errorf("a symbol position entry is not found: %v -> %v", ePos, eByte)
+ continue
+ }
+ if byte.from != eByte.from || byte.to != eByte.to {
+ t.Errorf("unexpected symbol position entry: want: %v -> %v, got: %v -> %v", ePos, eByte, ePos, byte)
+ }
+ }
+
+ if len(actual.endPos2ID) != len(expected.endPos2ID) {
+ t.Errorf("unexpected endPos2ID entries: want: %v entries, got: %v entries", len(expected.endPos2ID), len(actual.endPos2ID))
+ }
+ for ePos, eID := range expected.endPos2ID {
+ id, ok := actual.endPos2ID[ePos]
+ if !ok {
+ t.Errorf("an end position entry is not found: %v -> %v", ePos, eID)
+ continue
+ }
+ if id != eID {
+ t.Errorf("unexpected end position entry: want: %v -> %v, got: %v -> %v", ePos, eID, ePos, id)
+ }
+ }
+}
+
+
+func MainTest() {}