aboutsummaryrefslogtreecommitdiff
path: root/compressor
diff options
context:
space:
mode:
Diffstat (limited to 'compressor')
-rw-r--r--compressor/compressor.go214
-rw-r--r--compressor/compressor_test.go122
2 files changed, 0 insertions, 336 deletions
diff --git a/compressor/compressor.go b/compressor/compressor.go
deleted file mode 100644
index cdfeacb..0000000
--- a/compressor/compressor.go
+++ /dev/null
@@ -1,214 +0,0 @@
-package compressor
-
-import (
- "encoding/binary"
- "fmt"
- "sort"
-)
-
-type OriginalTable struct {
- entries []int
- rowCount int
- colCount int
-}
-
-func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) {
- if len(entries) == 0 {
- return nil, fmt.Errorf("enries is empty")
- }
- if colCount <= 0 {
- return nil, fmt.Errorf("colCount must be >=1")
- }
- if len(entries)%colCount != 0 {
- return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount)
- }
-
- return &OriginalTable{
- entries: entries,
- rowCount: len(entries) / colCount,
- colCount: colCount,
- }, nil
-}
-
-type Compressor interface {
- Compress(orig *OriginalTable) error
- Lookup(row, col int) (int, error)
- OriginalTableSize() (int, int)
-}
-
-var (
- _ Compressor = &UniqueEntriesTable{}
- _ Compressor = &RowDisplacementTable{}
-)
-
-type UniqueEntriesTable struct {
- UniqueEntries []int
- RowNums []int
- OriginalRowCount int
- OriginalColCount int
-}
-
-func NewUniqueEntriesTable() *UniqueEntriesTable {
- return &UniqueEntriesTable{}
-}
-
-func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) {
- if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
- return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
- }
- return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil
-}
-
-func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) {
- return tab.OriginalRowCount, tab.OriginalColCount
-}
-
-func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error {
- var uniqueEntries []int
- rowNums := make([]int, orig.rowCount)
- hash2RowNum := map[string]int{}
- nextRowNum := 0
- for row := 0; row < orig.rowCount; row++ {
- var rowHash string
- {
- buf := make([]byte, 0, orig.colCount*8)
- for col := 0; col < orig.colCount; col++ {
- b := make([]byte, 8)
- binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col]))
- buf = append(buf, b...)
- }
- rowHash = string(buf)
- }
- rowNum, ok := hash2RowNum[rowHash]
- if !ok {
- rowNum = nextRowNum
- nextRowNum++
- hash2RowNum[rowHash] = rowNum
- start := row * orig.colCount
- entry := append([]int{}, orig.entries[start:start+orig.colCount]...)
- uniqueEntries = append(uniqueEntries, entry...)
- }
- rowNums[row] = rowNum
- }
-
- tab.UniqueEntries = uniqueEntries
- tab.RowNums = rowNums
- tab.OriginalRowCount = orig.rowCount
- tab.OriginalColCount = orig.colCount
-
- return nil
-}
-
-const ForbiddenValue = -1
-
-type RowDisplacementTable struct {
- OriginalRowCount int
- OriginalColCount int
- EmptyValue int
- Entries []int
- Bounds []int
- RowDisplacement []int
-}
-
-func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable {
- return &RowDisplacementTable{
- EmptyValue: emptyValue,
- }
-}
-
-func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) {
- if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
- return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
- }
- d := tab.RowDisplacement[row]
- if tab.Bounds[d+col] != row {
- return tab.EmptyValue, nil
- }
- return tab.Entries[d+col], nil
-}
-
-func (tab *RowDisplacementTable) OriginalTableSize() (int, int) {
- return tab.OriginalRowCount, tab.OriginalColCount
-}
-
-type rowInfo struct {
- rowNum int
- nonEmptyCount int
- nonEmptyCol []int
-}
-
-func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error {
- rowInfo := make([]rowInfo, orig.rowCount)
- {
- row := 0
- col := 0
- rowInfo[0].rowNum = 0
- for _, v := range orig.entries {
- if col == orig.colCount {
- row++
- col = 0
- rowInfo[row].rowNum = row
- }
- if v != tab.EmptyValue {
- rowInfo[row].nonEmptyCount++
- rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col)
- }
- col++
- }
-
- sort.SliceStable(rowInfo, func(i int, j int) bool {
- return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount
- })
- }
-
- origEntriesLen := len(orig.entries)
- entries := make([]int, origEntriesLen)
- bounds := make([]int, origEntriesLen)
- resultBottom := orig.colCount
- rowDisplacement := make([]int, orig.rowCount)
- {
- for i := 0; i < origEntriesLen; i++ {
- entries[i] = tab.EmptyValue
- bounds[i] = ForbiddenValue
- }
-
- nextRowDisplacement := 0
- for _, rInfo := range rowInfo {
- if rInfo.nonEmptyCount <= 0 {
- continue
- }
-
- for {
- isOverlapped := false
- for _, col := range rInfo.nonEmptyCol {
- if entries[nextRowDisplacement+col] == tab.EmptyValue {
- continue
- }
- nextRowDisplacement++
- isOverlapped = true
- break
- }
- if isOverlapped {
- continue
- }
-
- rowDisplacement[rInfo.rowNum] = nextRowDisplacement
- for _, col := range rInfo.nonEmptyCol {
- entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col]
- bounds[nextRowDisplacement+col] = rInfo.rowNum
- }
- resultBottom = nextRowDisplacement + orig.colCount
- nextRowDisplacement++
- break
- }
- }
- }
-
- tab.OriginalRowCount = orig.rowCount
- tab.OriginalColCount = orig.colCount
- tab.Entries = entries[:resultBottom]
- tab.Bounds = bounds[:resultBottom]
- tab.RowDisplacement = rowDisplacement
-
- return nil
-}
diff --git a/compressor/compressor_test.go b/compressor/compressor_test.go
deleted file mode 100644
index 621b731..0000000
--- a/compressor/compressor_test.go
+++ /dev/null
@@ -1,122 +0,0 @@
-package compressor
-
-import (
- "fmt"
- "testing"
-)
-
-func TestCompressor_Compress(t *testing.T) {
- x := 0 // an empty value
-
- allCompressors := func() []Compressor {
- return []Compressor{
- NewUniqueEntriesTable(),
- NewRowDisplacementTable(x),
- }
- }
-
- tests := []struct {
- original []int
- rowCount int
- colCount int
- compressors []Compressor
- }{
- {
- original: []int{
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- },
- rowCount: 3,
- colCount: 5,
- compressors: allCompressors(),
- },
- {
- original: []int{
- x, x, x, x, x,
- x, x, x, x, x,
- x, x, x, x, x,
- },
- rowCount: 3,
- colCount: 5,
- compressors: allCompressors(),
- },
- {
- original: []int{
- 1, 1, 1, 1, 1,
- x, x, x, x, x,
- 1, 1, 1, 1, 1,
- },
- rowCount: 3,
- colCount: 5,
- compressors: allCompressors(),
- },
- {
- original: []int{
- 1, x, 1, 1, 1,
- 1, 1, x, 1, 1,
- 1, 1, 1, x, 1,
- },
- rowCount: 3,
- colCount: 5,
- compressors: allCompressors(),
- },
- }
- for i, tt := range tests {
- for _, comp := range tt.compressors {
- t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) {
- dup := make([]int, len(tt.original))
- copy(dup, tt.original)
-
- orig, err := NewOriginalTable(tt.original, tt.colCount)
- if err != nil {
- t.Fatal(err)
- }
- err = comp.Compress(orig)
- if err != nil {
- t.Fatal(err)
- }
- rowCount, colCount := comp.OriginalTableSize()
- if rowCount != tt.rowCount || colCount != tt.colCount {
- t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount)
- }
- for i := 0; i < tt.rowCount; i++ {
- for j := 0; j < tt.colCount; j++ {
- v, err := comp.Lookup(i, j)
- if err != nil {
- t.Fatal(err)
- }
- expected := tt.original[i*tt.colCount+j]
- if v != expected {
- t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v)
- }
- }
- }
-
- // Calling with out-of-range indexes should be an error.
- if _, err := comp.Lookup(0, -1); err == nil {
- t.Fatalf("expected error didn't occur (0, -1)")
- }
- if _, err := comp.Lookup(-1, 0); err == nil {
- t.Fatalf("expected error didn't occur (-1, 0)")
- }
- if _, err := comp.Lookup(rowCount-1, colCount); err == nil {
- t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount)
- }
- if _, err := comp.Lookup(rowCount, colCount-1); err == nil {
- t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1)
- }
-
- // The compressor must not break the original table.
- for i := 0; i < tt.rowCount; i++ {
- for j := 0; j < tt.colCount; j++ {
- idx := i*tt.colCount + j
- if tt.original[idx] != dup[idx] {
- t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx])
- }
- }
- }
- })
- }
- }
-}