aboutsummaryrefslogtreecommitdiff
path: root/compressor
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-05-07 21:57:47 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-05-07 21:57:47 +0900
commitff10e6c495101bc0a73de66d1f28f180f3b562a7 (patch)
treee92904c540855e52f5dce3ccd53fb9a586657b04 /compressor
parentRemove Peek* functions (diff)
downloadtre-ff10e6c495101bc0a73de66d1f28f180f3b562a7.tar.gz
tre-ff10e6c495101bc0a73de66d1f28f180f3b562a7.tar.xz
Add transition table compressor
Diffstat (limited to 'compressor')
-rw-r--r--compressor/compressor.go214
-rw-r--r--compressor/compressor_test.go122
2 files changed, 336 insertions, 0 deletions
diff --git a/compressor/compressor.go b/compressor/compressor.go
new file mode 100644
index 0000000..cdfeacb
--- /dev/null
+++ b/compressor/compressor.go
@@ -0,0 +1,214 @@
+package compressor
+
+import (
+ "encoding/binary"
+ "fmt"
+ "sort"
+)
+
+type OriginalTable struct {
+ entries []int
+ rowCount int
+ colCount int
+}
+
+func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) {
+ if len(entries) == 0 {
+ return nil, fmt.Errorf("enries is empty")
+ }
+ if colCount <= 0 {
+ return nil, fmt.Errorf("colCount must be >=1")
+ }
+ if len(entries)%colCount != 0 {
+ return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount)
+ }
+
+ return &OriginalTable{
+ entries: entries,
+ rowCount: len(entries) / colCount,
+ colCount: colCount,
+ }, nil
+}
+
+type Compressor interface {
+ Compress(orig *OriginalTable) error
+ Lookup(row, col int) (int, error)
+ OriginalTableSize() (int, int)
+}
+
+var (
+ _ Compressor = &UniqueEntriesTable{}
+ _ Compressor = &RowDisplacementTable{}
+)
+
+type UniqueEntriesTable struct {
+ UniqueEntries []int
+ RowNums []int
+ OriginalRowCount int
+ OriginalColCount int
+}
+
+func NewUniqueEntriesTable() *UniqueEntriesTable {
+ return &UniqueEntriesTable{}
+}
+
+func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) {
+ if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
+ return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
+ }
+ return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil
+}
+
+func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) {
+ return tab.OriginalRowCount, tab.OriginalColCount
+}
+
+func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error {
+ var uniqueEntries []int
+ rowNums := make([]int, orig.rowCount)
+ hash2RowNum := map[string]int{}
+ nextRowNum := 0
+ for row := 0; row < orig.rowCount; row++ {
+ var rowHash string
+ {
+ buf := make([]byte, 0, orig.colCount*8)
+ for col := 0; col < orig.colCount; col++ {
+ b := make([]byte, 8)
+ binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col]))
+ buf = append(buf, b...)
+ }
+ rowHash = string(buf)
+ }
+ rowNum, ok := hash2RowNum[rowHash]
+ if !ok {
+ rowNum = nextRowNum
+ nextRowNum++
+ hash2RowNum[rowHash] = rowNum
+ start := row * orig.colCount
+ entry := append([]int{}, orig.entries[start:start+orig.colCount]...)
+ uniqueEntries = append(uniqueEntries, entry...)
+ }
+ rowNums[row] = rowNum
+ }
+
+ tab.UniqueEntries = uniqueEntries
+ tab.RowNums = rowNums
+ tab.OriginalRowCount = orig.rowCount
+ tab.OriginalColCount = orig.colCount
+
+ return nil
+}
+
+const ForbiddenValue = -1
+
+type RowDisplacementTable struct {
+ OriginalRowCount int
+ OriginalColCount int
+ EmptyValue int
+ Entries []int
+ Bounds []int
+ RowDisplacement []int
+}
+
+func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable {
+ return &RowDisplacementTable{
+ EmptyValue: emptyValue,
+ }
+}
+
+func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) {
+ if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
+ return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
+ }
+ d := tab.RowDisplacement[row]
+ if tab.Bounds[d+col] != row {
+ return tab.EmptyValue, nil
+ }
+ return tab.Entries[d+col], nil
+}
+
+func (tab *RowDisplacementTable) OriginalTableSize() (int, int) {
+ return tab.OriginalRowCount, tab.OriginalColCount
+}
+
+type rowInfo struct {
+ rowNum int
+ nonEmptyCount int
+ nonEmptyCol []int
+}
+
+func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error {
+ rowInfo := make([]rowInfo, orig.rowCount)
+ {
+ row := 0
+ col := 0
+ rowInfo[0].rowNum = 0
+ for _, v := range orig.entries {
+ if col == orig.colCount {
+ row++
+ col = 0
+ rowInfo[row].rowNum = row
+ }
+ if v != tab.EmptyValue {
+ rowInfo[row].nonEmptyCount++
+ rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col)
+ }
+ col++
+ }
+
+ sort.SliceStable(rowInfo, func(i int, j int) bool {
+ return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount
+ })
+ }
+
+ origEntriesLen := len(orig.entries)
+ entries := make([]int, origEntriesLen)
+ bounds := make([]int, origEntriesLen)
+ resultBottom := orig.colCount
+ rowDisplacement := make([]int, orig.rowCount)
+ {
+ for i := 0; i < origEntriesLen; i++ {
+ entries[i] = tab.EmptyValue
+ bounds[i] = ForbiddenValue
+ }
+
+ nextRowDisplacement := 0
+ for _, rInfo := range rowInfo {
+ if rInfo.nonEmptyCount <= 0 {
+ continue
+ }
+
+ for {
+ isOverlapped := false
+ for _, col := range rInfo.nonEmptyCol {
+ if entries[nextRowDisplacement+col] == tab.EmptyValue {
+ continue
+ }
+ nextRowDisplacement++
+ isOverlapped = true
+ break
+ }
+ if isOverlapped {
+ continue
+ }
+
+ rowDisplacement[rInfo.rowNum] = nextRowDisplacement
+ for _, col := range rInfo.nonEmptyCol {
+ entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col]
+ bounds[nextRowDisplacement+col] = rInfo.rowNum
+ }
+ resultBottom = nextRowDisplacement + orig.colCount
+ nextRowDisplacement++
+ break
+ }
+ }
+ }
+
+ tab.OriginalRowCount = orig.rowCount
+ tab.OriginalColCount = orig.colCount
+ tab.Entries = entries[:resultBottom]
+ tab.Bounds = bounds[:resultBottom]
+ tab.RowDisplacement = rowDisplacement
+
+ return nil
+}
diff --git a/compressor/compressor_test.go b/compressor/compressor_test.go
new file mode 100644
index 0000000..621b731
--- /dev/null
+++ b/compressor/compressor_test.go
@@ -0,0 +1,122 @@
+package compressor
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestCompressor_Compress(t *testing.T) {
+ x := 0 // an empty value
+
+ allCompressors := func() []Compressor {
+ return []Compressor{
+ NewUniqueEntriesTable(),
+ NewRowDisplacementTable(x),
+ }
+ }
+
+ tests := []struct {
+ original []int
+ rowCount int
+ colCount int
+ compressors []Compressor
+ }{
+ {
+ original: []int{
+ 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ x, x, x, x, x,
+ x, x, x, x, x,
+ x, x, x, x, x,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ 1, 1, 1, 1, 1,
+ x, x, x, x, x,
+ 1, 1, 1, 1, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ 1, x, 1, 1, 1,
+ 1, 1, x, 1, 1,
+ 1, 1, 1, x, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ }
+ for i, tt := range tests {
+ for _, comp := range tt.compressors {
+ t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) {
+ dup := make([]int, len(tt.original))
+ copy(dup, tt.original)
+
+ orig, err := NewOriginalTable(tt.original, tt.colCount)
+ if err != nil {
+ t.Fatal(err)
+ }
+ err = comp.Compress(orig)
+ if err != nil {
+ t.Fatal(err)
+ }
+ rowCount, colCount := comp.OriginalTableSize()
+ if rowCount != tt.rowCount || colCount != tt.colCount {
+ t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount)
+ }
+ for i := 0; i < tt.rowCount; i++ {
+ for j := 0; j < tt.colCount; j++ {
+ v, err := comp.Lookup(i, j)
+ if err != nil {
+ t.Fatal(err)
+ }
+ expected := tt.original[i*tt.colCount+j]
+ if v != expected {
+ t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v)
+ }
+ }
+ }
+
+ // Calling with out-of-range indexes should be an error.
+ if _, err := comp.Lookup(0, -1); err == nil {
+ t.Fatalf("expected error didn't occur (0, -1)")
+ }
+ if _, err := comp.Lookup(-1, 0); err == nil {
+ t.Fatalf("expected error didn't occur (-1, 0)")
+ }
+ if _, err := comp.Lookup(rowCount-1, colCount); err == nil {
+ t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount)
+ }
+ if _, err := comp.Lookup(rowCount, colCount-1); err == nil {
+ t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1)
+ }
+
+ // The compressor must not break the original table.
+ for i := 0; i < tt.rowCount; i++ {
+ for j := 0; j < tt.colCount; j++ {
+ idx := i*tt.colCount + j
+ if tt.original[idx] != dup[idx] {
+ t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx])
+ }
+ }
+ }
+ })
+ }
+ }
+}