diff options
author | EuAndreh <eu@euandre.org> | 2024-11-03 16:45:29 -0300 |
---|---|---|
committer | EuAndreh <eu@euandre.org> | 2024-11-29 10:06:17 -0300 |
commit | e049a52c2bdc0b68c0c6f1a04b4ca216c862b463 (patch) | |
tree | def612191cdf18580de3ab607fc81f08f26ec383 /compressor | |
parent | Absorb utf8/ code (diff) | |
download | tre-e049a52c2bdc0b68c0c6f1a04b4ca216c862b463.tar.gz tre-e049a52c2bdc0b68c0c6f1a04b4ca216c862b463.tar.xz |
Absorb compressor/ code
Diffstat (limited to 'compressor')
-rw-r--r-- | compressor/compressor.go | 214 | ||||
-rw-r--r-- | compressor/compressor_test.go | 122 |
2 files changed, 0 insertions, 336 deletions
diff --git a/compressor/compressor.go b/compressor/compressor.go deleted file mode 100644 index cdfeacb..0000000 --- a/compressor/compressor.go +++ /dev/null @@ -1,214 +0,0 @@ -package compressor - -import ( - "encoding/binary" - "fmt" - "sort" -) - -type OriginalTable struct { - entries []int - rowCount int - colCount int -} - -func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) { - if len(entries) == 0 { - return nil, fmt.Errorf("enries is empty") - } - if colCount <= 0 { - return nil, fmt.Errorf("colCount must be >=1") - } - if len(entries)%colCount != 0 { - return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount) - } - - return &OriginalTable{ - entries: entries, - rowCount: len(entries) / colCount, - colCount: colCount, - }, nil -} - -type Compressor interface { - Compress(orig *OriginalTable) error - Lookup(row, col int) (int, error) - OriginalTableSize() (int, int) -} - -var ( - _ Compressor = &UniqueEntriesTable{} - _ Compressor = &RowDisplacementTable{} -) - -type UniqueEntriesTable struct { - UniqueEntries []int - RowNums []int - OriginalRowCount int - OriginalColCount int -} - -func NewUniqueEntriesTable() *UniqueEntriesTable { - return &UniqueEntriesTable{} -} - -func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) { - if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { - return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) - } - return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil -} - -func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) { - return tab.OriginalRowCount, tab.OriginalColCount -} - -func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error { - var uniqueEntries []int - rowNums := make([]int, orig.rowCount) - hash2RowNum := map[string]int{} - nextRowNum := 0 - for row := 0; row < orig.rowCount; row++ { - var rowHash string - { - buf := make([]byte, 0, orig.colCount*8) - for col := 0; col < orig.colCount; col++ { - b := make([]byte, 8) - binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col])) - buf = append(buf, b...) - } - rowHash = string(buf) - } - rowNum, ok := hash2RowNum[rowHash] - if !ok { - rowNum = nextRowNum - nextRowNum++ - hash2RowNum[rowHash] = rowNum - start := row * orig.colCount - entry := append([]int{}, orig.entries[start:start+orig.colCount]...) - uniqueEntries = append(uniqueEntries, entry...) - } - rowNums[row] = rowNum - } - - tab.UniqueEntries = uniqueEntries - tab.RowNums = rowNums - tab.OriginalRowCount = orig.rowCount - tab.OriginalColCount = orig.colCount - - return nil -} - -const ForbiddenValue = -1 - -type RowDisplacementTable struct { - OriginalRowCount int - OriginalColCount int - EmptyValue int - Entries []int - Bounds []int - RowDisplacement []int -} - -func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable { - return &RowDisplacementTable{ - EmptyValue: emptyValue, - } -} - -func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) { - if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { - return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) - } - d := tab.RowDisplacement[row] - if tab.Bounds[d+col] != row { - return tab.EmptyValue, nil - } - return tab.Entries[d+col], nil -} - -func (tab *RowDisplacementTable) OriginalTableSize() (int, int) { - return tab.OriginalRowCount, tab.OriginalColCount -} - -type rowInfo struct { - rowNum int - nonEmptyCount int - nonEmptyCol []int -} - -func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error { - rowInfo := make([]rowInfo, orig.rowCount) - { - row := 0 - col := 0 - rowInfo[0].rowNum = 0 - for _, v := range orig.entries { - if col == orig.colCount { - row++ - col = 0 - rowInfo[row].rowNum = row - } - if v != tab.EmptyValue { - rowInfo[row].nonEmptyCount++ - rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col) - } - col++ - } - - sort.SliceStable(rowInfo, func(i int, j int) bool { - return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount - }) - } - - origEntriesLen := len(orig.entries) - entries := make([]int, origEntriesLen) - bounds := make([]int, origEntriesLen) - resultBottom := orig.colCount - rowDisplacement := make([]int, orig.rowCount) - { - for i := 0; i < origEntriesLen; i++ { - entries[i] = tab.EmptyValue - bounds[i] = ForbiddenValue - } - - nextRowDisplacement := 0 - for _, rInfo := range rowInfo { - if rInfo.nonEmptyCount <= 0 { - continue - } - - for { - isOverlapped := false - for _, col := range rInfo.nonEmptyCol { - if entries[nextRowDisplacement+col] == tab.EmptyValue { - continue - } - nextRowDisplacement++ - isOverlapped = true - break - } - if isOverlapped { - continue - } - - rowDisplacement[rInfo.rowNum] = nextRowDisplacement - for _, col := range rInfo.nonEmptyCol { - entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col] - bounds[nextRowDisplacement+col] = rInfo.rowNum - } - resultBottom = nextRowDisplacement + orig.colCount - nextRowDisplacement++ - break - } - } - } - - tab.OriginalRowCount = orig.rowCount - tab.OriginalColCount = orig.colCount - tab.Entries = entries[:resultBottom] - tab.Bounds = bounds[:resultBottom] - tab.RowDisplacement = rowDisplacement - - return nil -} diff --git a/compressor/compressor_test.go b/compressor/compressor_test.go deleted file mode 100644 index 621b731..0000000 --- a/compressor/compressor_test.go +++ /dev/null @@ -1,122 +0,0 @@ -package compressor - -import ( - "fmt" - "testing" -) - -func TestCompressor_Compress(t *testing.T) { - x := 0 // an empty value - - allCompressors := func() []Compressor { - return []Compressor{ - NewUniqueEntriesTable(), - NewRowDisplacementTable(x), - } - } - - tests := []struct { - original []int - rowCount int - colCount int - compressors []Compressor - }{ - { - original: []int{ - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - x, x, x, x, x, - x, x, x, x, x, - x, x, x, x, x, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - 1, 1, 1, 1, 1, - x, x, x, x, x, - 1, 1, 1, 1, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - 1, x, 1, 1, 1, - 1, 1, x, 1, 1, - 1, 1, 1, x, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - } - for i, tt := range tests { - for _, comp := range tt.compressors { - t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) { - dup := make([]int, len(tt.original)) - copy(dup, tt.original) - - orig, err := NewOriginalTable(tt.original, tt.colCount) - if err != nil { - t.Fatal(err) - } - err = comp.Compress(orig) - if err != nil { - t.Fatal(err) - } - rowCount, colCount := comp.OriginalTableSize() - if rowCount != tt.rowCount || colCount != tt.colCount { - t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount) - } - for i := 0; i < tt.rowCount; i++ { - for j := 0; j < tt.colCount; j++ { - v, err := comp.Lookup(i, j) - if err != nil { - t.Fatal(err) - } - expected := tt.original[i*tt.colCount+j] - if v != expected { - t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v) - } - } - } - - // Calling with out-of-range indexes should be an error. - if _, err := comp.Lookup(0, -1); err == nil { - t.Fatalf("expected error didn't occur (0, -1)") - } - if _, err := comp.Lookup(-1, 0); err == nil { - t.Fatalf("expected error didn't occur (-1, 0)") - } - if _, err := comp.Lookup(rowCount-1, colCount); err == nil { - t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount) - } - if _, err := comp.Lookup(rowCount, colCount-1); err == nil { - t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1) - } - - // The compressor must not break the original table. - for i := 0; i < tt.rowCount; i++ { - for j := 0; j < tt.colCount; j++ { - idx := i*tt.colCount + j - if tt.original[idx] != dup[idx] { - t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx]) - } - } - } - }) - } - } -} |