aboutsummaryrefslogtreecommitdiff
path: root/compressor/compressor.go
diff options
context:
space:
mode:
Diffstat (limited to 'compressor/compressor.go')
-rw-r--r--compressor/compressor.go214
1 files changed, 0 insertions, 214 deletions
diff --git a/compressor/compressor.go b/compressor/compressor.go
deleted file mode 100644
index cdfeacb..0000000
--- a/compressor/compressor.go
+++ /dev/null
@@ -1,214 +0,0 @@
-package compressor
-
-import (
- "encoding/binary"
- "fmt"
- "sort"
-)
-
-type OriginalTable struct {
- entries []int
- rowCount int
- colCount int
-}
-
-func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) {
- if len(entries) == 0 {
- return nil, fmt.Errorf("enries is empty")
- }
- if colCount <= 0 {
- return nil, fmt.Errorf("colCount must be >=1")
- }
- if len(entries)%colCount != 0 {
- return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount)
- }
-
- return &OriginalTable{
- entries: entries,
- rowCount: len(entries) / colCount,
- colCount: colCount,
- }, nil
-}
-
-type Compressor interface {
- Compress(orig *OriginalTable) error
- Lookup(row, col int) (int, error)
- OriginalTableSize() (int, int)
-}
-
-var (
- _ Compressor = &UniqueEntriesTable{}
- _ Compressor = &RowDisplacementTable{}
-)
-
-type UniqueEntriesTable struct {
- UniqueEntries []int
- RowNums []int
- OriginalRowCount int
- OriginalColCount int
-}
-
-func NewUniqueEntriesTable() *UniqueEntriesTable {
- return &UniqueEntriesTable{}
-}
-
-func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) {
- if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
- return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
- }
- return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil
-}
-
-func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) {
- return tab.OriginalRowCount, tab.OriginalColCount
-}
-
-func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error {
- var uniqueEntries []int
- rowNums := make([]int, orig.rowCount)
- hash2RowNum := map[string]int{}
- nextRowNum := 0
- for row := 0; row < orig.rowCount; row++ {
- var rowHash string
- {
- buf := make([]byte, 0, orig.colCount*8)
- for col := 0; col < orig.colCount; col++ {
- b := make([]byte, 8)
- binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col]))
- buf = append(buf, b...)
- }
- rowHash = string(buf)
- }
- rowNum, ok := hash2RowNum[rowHash]
- if !ok {
- rowNum = nextRowNum
- nextRowNum++
- hash2RowNum[rowHash] = rowNum
- start := row * orig.colCount
- entry := append([]int{}, orig.entries[start:start+orig.colCount]...)
- uniqueEntries = append(uniqueEntries, entry...)
- }
- rowNums[row] = rowNum
- }
-
- tab.UniqueEntries = uniqueEntries
- tab.RowNums = rowNums
- tab.OriginalRowCount = orig.rowCount
- tab.OriginalColCount = orig.colCount
-
- return nil
-}
-
-const ForbiddenValue = -1
-
-type RowDisplacementTable struct {
- OriginalRowCount int
- OriginalColCount int
- EmptyValue int
- Entries []int
- Bounds []int
- RowDisplacement []int
-}
-
-func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable {
- return &RowDisplacementTable{
- EmptyValue: emptyValue,
- }
-}
-
-func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) {
- if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
- return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
- }
- d := tab.RowDisplacement[row]
- if tab.Bounds[d+col] != row {
- return tab.EmptyValue, nil
- }
- return tab.Entries[d+col], nil
-}
-
-func (tab *RowDisplacementTable) OriginalTableSize() (int, int) {
- return tab.OriginalRowCount, tab.OriginalColCount
-}
-
-type rowInfo struct {
- rowNum int
- nonEmptyCount int
- nonEmptyCol []int
-}
-
-func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error {
- rowInfo := make([]rowInfo, orig.rowCount)
- {
- row := 0
- col := 0
- rowInfo[0].rowNum = 0
- for _, v := range orig.entries {
- if col == orig.colCount {
- row++
- col = 0
- rowInfo[row].rowNum = row
- }
- if v != tab.EmptyValue {
- rowInfo[row].nonEmptyCount++
- rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col)
- }
- col++
- }
-
- sort.SliceStable(rowInfo, func(i int, j int) bool {
- return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount
- })
- }
-
- origEntriesLen := len(orig.entries)
- entries := make([]int, origEntriesLen)
- bounds := make([]int, origEntriesLen)
- resultBottom := orig.colCount
- rowDisplacement := make([]int, orig.rowCount)
- {
- for i := 0; i < origEntriesLen; i++ {
- entries[i] = tab.EmptyValue
- bounds[i] = ForbiddenValue
- }
-
- nextRowDisplacement := 0
- for _, rInfo := range rowInfo {
- if rInfo.nonEmptyCount <= 0 {
- continue
- }
-
- for {
- isOverlapped := false
- for _, col := range rInfo.nonEmptyCol {
- if entries[nextRowDisplacement+col] == tab.EmptyValue {
- continue
- }
- nextRowDisplacement++
- isOverlapped = true
- break
- }
- if isOverlapped {
- continue
- }
-
- rowDisplacement[rInfo.rowNum] = nextRowDisplacement
- for _, col := range rInfo.nonEmptyCol {
- entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col]
- bounds[nextRowDisplacement+col] = rInfo.rowNum
- }
- resultBottom = nextRowDisplacement + orig.colCount
- nextRowDisplacement++
- break
- }
- }
- }
-
- tab.OriginalRowCount = orig.rowCount
- tab.OriginalColCount = orig.colCount
- tab.Entries = entries[:resultBottom]
- tab.Bounds = bounds[:resultBottom]
- tab.RowDisplacement = rowDisplacement
-
- return nil
-}