diff options
Diffstat (limited to 'compressor')
-rw-r--r-- | compressor/compressor.go | 214 | ||||
-rw-r--r-- | compressor/compressor_test.go | 122 |
2 files changed, 336 insertions, 0 deletions
diff --git a/compressor/compressor.go b/compressor/compressor.go new file mode 100644 index 0000000..cdfeacb --- /dev/null +++ b/compressor/compressor.go @@ -0,0 +1,214 @@ +package compressor + +import ( + "encoding/binary" + "fmt" + "sort" +) + +type OriginalTable struct { + entries []int + rowCount int + colCount int +} + +func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) { + if len(entries) == 0 { + return nil, fmt.Errorf("enries is empty") + } + if colCount <= 0 { + return nil, fmt.Errorf("colCount must be >=1") + } + if len(entries)%colCount != 0 { + return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount) + } + + return &OriginalTable{ + entries: entries, + rowCount: len(entries) / colCount, + colCount: colCount, + }, nil +} + +type Compressor interface { + Compress(orig *OriginalTable) error + Lookup(row, col int) (int, error) + OriginalTableSize() (int, int) +} + +var ( + _ Compressor = &UniqueEntriesTable{} + _ Compressor = &RowDisplacementTable{} +) + +type UniqueEntriesTable struct { + UniqueEntries []int + RowNums []int + OriginalRowCount int + OriginalColCount int +} + +func NewUniqueEntriesTable() *UniqueEntriesTable { + return &UniqueEntriesTable{} +} + +func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) { + if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { + return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) + } + return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil +} + +func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) { + return tab.OriginalRowCount, tab.OriginalColCount +} + +func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error { + var uniqueEntries []int + rowNums := make([]int, orig.rowCount) + hash2RowNum := map[string]int{} + nextRowNum := 0 + for row := 0; row < orig.rowCount; row++ { + var rowHash string + { + buf := make([]byte, 0, orig.colCount*8) + for col := 0; col < orig.colCount; col++ { + b := make([]byte, 8) + binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col])) + buf = append(buf, b...) + } + rowHash = string(buf) + } + rowNum, ok := hash2RowNum[rowHash] + if !ok { + rowNum = nextRowNum + nextRowNum++ + hash2RowNum[rowHash] = rowNum + start := row * orig.colCount + entry := append([]int{}, orig.entries[start:start+orig.colCount]...) + uniqueEntries = append(uniqueEntries, entry...) + } + rowNums[row] = rowNum + } + + tab.UniqueEntries = uniqueEntries + tab.RowNums = rowNums + tab.OriginalRowCount = orig.rowCount + tab.OriginalColCount = orig.colCount + + return nil +} + +const ForbiddenValue = -1 + +type RowDisplacementTable struct { + OriginalRowCount int + OriginalColCount int + EmptyValue int + Entries []int + Bounds []int + RowDisplacement []int +} + +func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable { + return &RowDisplacementTable{ + EmptyValue: emptyValue, + } +} + +func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) { + if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { + return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) + } + d := tab.RowDisplacement[row] + if tab.Bounds[d+col] != row { + return tab.EmptyValue, nil + } + return tab.Entries[d+col], nil +} + +func (tab *RowDisplacementTable) OriginalTableSize() (int, int) { + return tab.OriginalRowCount, tab.OriginalColCount +} + +type rowInfo struct { + rowNum int + nonEmptyCount int + nonEmptyCol []int +} + +func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error { + rowInfo := make([]rowInfo, orig.rowCount) + { + row := 0 + col := 0 + rowInfo[0].rowNum = 0 + for _, v := range orig.entries { + if col == orig.colCount { + row++ + col = 0 + rowInfo[row].rowNum = row + } + if v != tab.EmptyValue { + rowInfo[row].nonEmptyCount++ + rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col) + } + col++ + } + + sort.SliceStable(rowInfo, func(i int, j int) bool { + return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount + }) + } + + origEntriesLen := len(orig.entries) + entries := make([]int, origEntriesLen) + bounds := make([]int, origEntriesLen) + resultBottom := orig.colCount + rowDisplacement := make([]int, orig.rowCount) + { + for i := 0; i < origEntriesLen; i++ { + entries[i] = tab.EmptyValue + bounds[i] = ForbiddenValue + } + + nextRowDisplacement := 0 + for _, rInfo := range rowInfo { + if rInfo.nonEmptyCount <= 0 { + continue + } + + for { + isOverlapped := false + for _, col := range rInfo.nonEmptyCol { + if entries[nextRowDisplacement+col] == tab.EmptyValue { + continue + } + nextRowDisplacement++ + isOverlapped = true + break + } + if isOverlapped { + continue + } + + rowDisplacement[rInfo.rowNum] = nextRowDisplacement + for _, col := range rInfo.nonEmptyCol { + entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col] + bounds[nextRowDisplacement+col] = rInfo.rowNum + } + resultBottom = nextRowDisplacement + orig.colCount + nextRowDisplacement++ + break + } + } + } + + tab.OriginalRowCount = orig.rowCount + tab.OriginalColCount = orig.colCount + tab.Entries = entries[:resultBottom] + tab.Bounds = bounds[:resultBottom] + tab.RowDisplacement = rowDisplacement + + return nil +} diff --git a/compressor/compressor_test.go b/compressor/compressor_test.go new file mode 100644 index 0000000..621b731 --- /dev/null +++ b/compressor/compressor_test.go @@ -0,0 +1,122 @@ +package compressor + +import ( + "fmt" + "testing" +) + +func TestCompressor_Compress(t *testing.T) { + x := 0 // an empty value + + allCompressors := func() []Compressor { + return []Compressor{ + NewUniqueEntriesTable(), + NewRowDisplacementTable(x), + } + } + + tests := []struct { + original []int + rowCount int + colCount int + compressors []Compressor + }{ + { + original: []int{ + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + x, x, x, x, x, + x, x, x, x, x, + x, x, x, x, x, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + 1, 1, 1, 1, 1, + x, x, x, x, x, + 1, 1, 1, 1, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + 1, x, 1, 1, 1, + 1, 1, x, 1, 1, + 1, 1, 1, x, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + } + for i, tt := range tests { + for _, comp := range tt.compressors { + t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) { + dup := make([]int, len(tt.original)) + copy(dup, tt.original) + + orig, err := NewOriginalTable(tt.original, tt.colCount) + if err != nil { + t.Fatal(err) + } + err = comp.Compress(orig) + if err != nil { + t.Fatal(err) + } + rowCount, colCount := comp.OriginalTableSize() + if rowCount != tt.rowCount || colCount != tt.colCount { + t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount) + } + for i := 0; i < tt.rowCount; i++ { + for j := 0; j < tt.colCount; j++ { + v, err := comp.Lookup(i, j) + if err != nil { + t.Fatal(err) + } + expected := tt.original[i*tt.colCount+j] + if v != expected { + t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v) + } + } + } + + // Calling with out-of-range indexes should be an error. + if _, err := comp.Lookup(0, -1); err == nil { + t.Fatalf("expected error didn't occur (0, -1)") + } + if _, err := comp.Lookup(-1, 0); err == nil { + t.Fatalf("expected error didn't occur (-1, 0)") + } + if _, err := comp.Lookup(rowCount-1, colCount); err == nil { + t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount) + } + if _, err := comp.Lookup(rowCount, colCount-1); err == nil { + t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1) + } + + // The compressor must not break the original table. + for i := 0; i < tt.rowCount; i++ { + for j := 0; j < tt.colCount; j++ { + idx := i*tt.colCount + j + if tt.original[idx] != dup[idx] { + t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx]) + } + } + } + }) + } + } +} |