diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-05-07 21:57:47 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-05-07 21:57:47 +0900 |
commit | ff10e6c495101bc0a73de66d1f28f180f3b562a7 (patch) | |
tree | e92904c540855e52f5dce3ccd53fb9a586657b04 | |
parent | Remove Peek* functions (diff) | |
download | tre-ff10e6c495101bc0a73de66d1f28f180f3b562a7.tar.gz tre-ff10e6c495101bc0a73de66d1f28f180f3b562a7.tar.xz |
Add transition table compressor
-rw-r--r-- | compiler/compiler.go | 53 | ||||
-rw-r--r-- | compiler/dfa.go | 16 | ||||
-rw-r--r-- | compressor/compressor.go | 214 | ||||
-rw-r--r-- | compressor/compressor_test.go | 122 | ||||
-rw-r--r-- | driver/lexer.go | 18 | ||||
-rw-r--r-- | spec/spec.go | 26 |
6 files changed, 431 insertions, 18 deletions
diff --git a/compiler/compiler.go b/compiler/compiler.go index 6f878c5..ffe0511 100644 --- a/compiler/compiler.go +++ b/compiler/compiler.go @@ -5,6 +5,7 @@ import ( "io" "strings" + "github.com/nihei9/maleeni/compressor" "github.com/nihei9/maleeni/log" "github.com/nihei9/maleeni/spec" ) @@ -160,14 +161,19 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod } config.logger.Log(`DFA: - States: %v states - Initial State: %v`, len(tranTab.Transition), tranTab.InitialState) + States: %v states (%v entries) + Initial State: %v`, tranTab.RowCount, tranTab.RowCount*tranTab.ColCount, tranTab.InitialState) config.logger.Log(" Accepting States:") for state, symbol := range tranTab.AcceptingStates { config.logger.Log(" %v: %v", state, symbol) } } + tranTab, err := compressTransitionTable(tranTab) + if err != nil { + return nil, err + } + return &spec.CompiledLexModeSpec{ Kinds: kinds, Push: push, @@ -175,3 +181,46 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod DFA: tranTab, }, nil } + +func compressTransitionTable(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) { + ueTab := compressor.NewUniqueEntriesTable() + { + orig, err := compressor.NewOriginalTable(tranTab.UncompressedTransition, tranTab.ColCount) + if err != nil { + return nil, err + } + err = ueTab.Compress(orig) + if err != nil { + return nil, err + } + } + + rdTab := compressor.NewRowDisplacementTable(0) + { + orig, err := compressor.NewOriginalTable(ueTab.UniqueEntries, ueTab.OriginalColCount) + if err != nil { + return nil, err + } + err = rdTab.Compress(orig) + if err != nil { + return nil, err + } + } + + tranTab.Transition = &spec.UniqueEntriesTable{ + UniqueEntries: &spec.RowDisplacementTable{ + OriginalRowCount: rdTab.OriginalRowCount, + OriginalColCount: rdTab.OriginalColCount, + EmptyValue: rdTab.EmptyValue, + Entries: rdTab.Entries, + Bounds: rdTab.Bounds, + RowDisplacement: rdTab.RowDisplacement, + }, + RowNums: ueTab.RowNums, + OriginalRowCount: ueTab.OriginalRowCount, + OriginalColCount: ueTab.OriginalColCount, + } + tranTab.UncompressedTransition = nil + + return tranTab, nil +} diff --git a/compiler/dfa.go b/compiler/dfa.go index 049ff3e..de2c451 100644 --- a/compiler/dfa.go +++ b/compiler/dfa.go @@ -112,18 +112,20 @@ func genTransitionTable(dfa *DFA) (*spec.TransitionTable, error) { acc[state2Num[s]] = id } - tran := make([][]int, len(dfa.States)+1) + rowCount := len(dfa.States) + 1 + colCount := 256 + tran := make([]int, rowCount*colCount) for s, tab := range dfa.TransitionTable { - entry := make([]int, 256) for v, to := range tab { - entry[v] = state2Num[to] + tran[state2Num[s]*256+v] = state2Num[to] } - tran[state2Num[s]] = entry } return &spec.TransitionTable{ - InitialState: state2Num[dfa.InitialState], - AcceptingStates: acc, - Transition: tran, + InitialState: state2Num[dfa.InitialState], + AcceptingStates: acc, + UncompressedTransition: tran, + RowCount: rowCount, + ColCount: colCount, }, nil } diff --git a/compressor/compressor.go b/compressor/compressor.go new file mode 100644 index 0000000..cdfeacb --- /dev/null +++ b/compressor/compressor.go @@ -0,0 +1,214 @@ +package compressor + +import ( + "encoding/binary" + "fmt" + "sort" +) + +type OriginalTable struct { + entries []int + rowCount int + colCount int +} + +func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) { + if len(entries) == 0 { + return nil, fmt.Errorf("enries is empty") + } + if colCount <= 0 { + return nil, fmt.Errorf("colCount must be >=1") + } + if len(entries)%colCount != 0 { + return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount) + } + + return &OriginalTable{ + entries: entries, + rowCount: len(entries) / colCount, + colCount: colCount, + }, nil +} + +type Compressor interface { + Compress(orig *OriginalTable) error + Lookup(row, col int) (int, error) + OriginalTableSize() (int, int) +} + +var ( + _ Compressor = &UniqueEntriesTable{} + _ Compressor = &RowDisplacementTable{} +) + +type UniqueEntriesTable struct { + UniqueEntries []int + RowNums []int + OriginalRowCount int + OriginalColCount int +} + +func NewUniqueEntriesTable() *UniqueEntriesTable { + return &UniqueEntriesTable{} +} + +func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) { + if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { + return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) + } + return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil +} + +func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) { + return tab.OriginalRowCount, tab.OriginalColCount +} + +func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error { + var uniqueEntries []int + rowNums := make([]int, orig.rowCount) + hash2RowNum := map[string]int{} + nextRowNum := 0 + for row := 0; row < orig.rowCount; row++ { + var rowHash string + { + buf := make([]byte, 0, orig.colCount*8) + for col := 0; col < orig.colCount; col++ { + b := make([]byte, 8) + binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col])) + buf = append(buf, b...) + } + rowHash = string(buf) + } + rowNum, ok := hash2RowNum[rowHash] + if !ok { + rowNum = nextRowNum + nextRowNum++ + hash2RowNum[rowHash] = rowNum + start := row * orig.colCount + entry := append([]int{}, orig.entries[start:start+orig.colCount]...) + uniqueEntries = append(uniqueEntries, entry...) + } + rowNums[row] = rowNum + } + + tab.UniqueEntries = uniqueEntries + tab.RowNums = rowNums + tab.OriginalRowCount = orig.rowCount + tab.OriginalColCount = orig.colCount + + return nil +} + +const ForbiddenValue = -1 + +type RowDisplacementTable struct { + OriginalRowCount int + OriginalColCount int + EmptyValue int + Entries []int + Bounds []int + RowDisplacement []int +} + +func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable { + return &RowDisplacementTable{ + EmptyValue: emptyValue, + } +} + +func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) { + if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { + return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) + } + d := tab.RowDisplacement[row] + if tab.Bounds[d+col] != row { + return tab.EmptyValue, nil + } + return tab.Entries[d+col], nil +} + +func (tab *RowDisplacementTable) OriginalTableSize() (int, int) { + return tab.OriginalRowCount, tab.OriginalColCount +} + +type rowInfo struct { + rowNum int + nonEmptyCount int + nonEmptyCol []int +} + +func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error { + rowInfo := make([]rowInfo, orig.rowCount) + { + row := 0 + col := 0 + rowInfo[0].rowNum = 0 + for _, v := range orig.entries { + if col == orig.colCount { + row++ + col = 0 + rowInfo[row].rowNum = row + } + if v != tab.EmptyValue { + rowInfo[row].nonEmptyCount++ + rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col) + } + col++ + } + + sort.SliceStable(rowInfo, func(i int, j int) bool { + return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount + }) + } + + origEntriesLen := len(orig.entries) + entries := make([]int, origEntriesLen) + bounds := make([]int, origEntriesLen) + resultBottom := orig.colCount + rowDisplacement := make([]int, orig.rowCount) + { + for i := 0; i < origEntriesLen; i++ { + entries[i] = tab.EmptyValue + bounds[i] = ForbiddenValue + } + + nextRowDisplacement := 0 + for _, rInfo := range rowInfo { + if rInfo.nonEmptyCount <= 0 { + continue + } + + for { + isOverlapped := false + for _, col := range rInfo.nonEmptyCol { + if entries[nextRowDisplacement+col] == tab.EmptyValue { + continue + } + nextRowDisplacement++ + isOverlapped = true + break + } + if isOverlapped { + continue + } + + rowDisplacement[rInfo.rowNum] = nextRowDisplacement + for _, col := range rInfo.nonEmptyCol { + entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col] + bounds[nextRowDisplacement+col] = rInfo.rowNum + } + resultBottom = nextRowDisplacement + orig.colCount + nextRowDisplacement++ + break + } + } + } + + tab.OriginalRowCount = orig.rowCount + tab.OriginalColCount = orig.colCount + tab.Entries = entries[:resultBottom] + tab.Bounds = bounds[:resultBottom] + tab.RowDisplacement = rowDisplacement + + return nil +} diff --git a/compressor/compressor_test.go b/compressor/compressor_test.go new file mode 100644 index 0000000..621b731 --- /dev/null +++ b/compressor/compressor_test.go @@ -0,0 +1,122 @@ +package compressor + +import ( + "fmt" + "testing" +) + +func TestCompressor_Compress(t *testing.T) { + x := 0 // an empty value + + allCompressors := func() []Compressor { + return []Compressor{ + NewUniqueEntriesTable(), + NewRowDisplacementTable(x), + } + } + + tests := []struct { + original []int + rowCount int + colCount int + compressors []Compressor + }{ + { + original: []int{ + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + x, x, x, x, x, + x, x, x, x, x, + x, x, x, x, x, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + 1, 1, 1, 1, 1, + x, x, x, x, x, + 1, 1, 1, 1, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + 1, x, 1, 1, 1, + 1, 1, x, 1, 1, + 1, 1, 1, x, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + } + for i, tt := range tests { + for _, comp := range tt.compressors { + t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) { + dup := make([]int, len(tt.original)) + copy(dup, tt.original) + + orig, err := NewOriginalTable(tt.original, tt.colCount) + if err != nil { + t.Fatal(err) + } + err = comp.Compress(orig) + if err != nil { + t.Fatal(err) + } + rowCount, colCount := comp.OriginalTableSize() + if rowCount != tt.rowCount || colCount != tt.colCount { + t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount) + } + for i := 0; i < tt.rowCount; i++ { + for j := 0; j < tt.colCount; j++ { + v, err := comp.Lookup(i, j) + if err != nil { + t.Fatal(err) + } + expected := tt.original[i*tt.colCount+j] + if v != expected { + t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v) + } + } + } + + // Calling with out-of-range indexes should be an error. + if _, err := comp.Lookup(0, -1); err == nil { + t.Fatalf("expected error didn't occur (0, -1)") + } + if _, err := comp.Lookup(-1, 0); err == nil { + t.Fatalf("expected error didn't occur (-1, 0)") + } + if _, err := comp.Lookup(rowCount-1, colCount); err == nil { + t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount) + } + if _, err := comp.Lookup(rowCount, colCount-1); err == nil { + t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1) + } + + // The compressor must not break the original table. + for i := 0; i < tt.rowCount; i++ { + for j := 0; j < tt.colCount; j++ { + idx := i*tt.colCount + j + if tt.original[idx] != dup[idx] { + t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx]) + } + } + } + }) + } + } +} diff --git a/driver/lexer.go b/driver/lexer.go index 486ff7d..485bfd9 100644 --- a/driver/lexer.go +++ b/driver/lexer.go @@ -255,12 +255,8 @@ func (l *lexer) next() (*Token, error) { } buf = append(buf, v) unfixedBufLen++ - entry := spec.DFA.Transition[state] - if len(entry) == 0 { - return nil, fmt.Errorf("no transition entry; state: %v", state) - } - nextState := entry[v] - if nextState == 0 { + nextState, ok := l.lookupNextState(mode, state, int(v)) + if !ok { if tok != nil { l.unread(unfixedBufLen) return tok, nil @@ -276,6 +272,16 @@ func (l *lexer) next() (*Token, error) { } } +func (l *lexer) lookupNextState(mode spec.LexModeNum, state int, v int) (int, bool) { + tab := l.clspec.Specs[mode].DFA.Transition + rowNum := tab.RowNums[state] + d := tab.UniqueEntries.RowDisplacement[rowNum] + if tab.UniqueEntries.Bounds[d+v] != rowNum { + return tab.UniqueEntries.EmptyValue, false + } + return tab.UniqueEntries.Entries[d+v], true +} + func (l *lexer) mode() spec.LexModeNum { return l.modeStack[len(l.modeStack)-1] } diff --git a/spec/spec.go b/spec/spec.go index e2291e9..1dba132 100644 --- a/spec/spec.go +++ b/spec/spec.go @@ -152,10 +152,30 @@ func (s *LexSpec) Validate() error { return nil } +type RowDisplacementTable struct { + OriginalRowCount int `json:"original_row_count"` + OriginalColCount int `json:"original_col_count"` + EmptyValue int `json:"empty_value"` + Entries []int `json:"entries"` + Bounds []int `json:"bounds"` + RowDisplacement []int `json:"row_displacement"` +} + +type UniqueEntriesTable struct { + UniqueEntries *RowDisplacementTable `json:"unique_entries"` + RowNums []int `json:"row_nums"` + OriginalRowCount int `json:"original_row_count"` + OriginalColCount int `json:"original_col_count"` + EmptyValue int `json:"empty_value"` +} + type TransitionTable struct { - InitialState int `json:"initial_state"` - AcceptingStates map[int]int `json:"accepting_states"` - Transition [][]int `json:"transition"` + InitialState int `json:"initial_state"` + AcceptingStates map[int]int `json:"accepting_states"` + RowCount int `json:"row_count"` + ColCount int `json:"col_count"` + Transition *UniqueEntriesTable `json:"transition,omitempty"` + UncompressedTransition []int `json:"uncompressed_transition,omitempty"` } type CompiledLexModeSpec struct { |