aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compressor/compressor.go214
-rw-r--r--compressor/compressor_test.go122
-rw-r--r--src/tre.go211
-rw-r--r--tests/tre.go117
4 files changed, 328 insertions, 336 deletions
diff --git a/compressor/compressor.go b/compressor/compressor.go
deleted file mode 100644
index cdfeacb..0000000
--- a/compressor/compressor.go
+++ /dev/null
@@ -1,214 +0,0 @@
-package compressor
-
-import (
- "encoding/binary"
- "fmt"
- "sort"
-)
-
-type OriginalTable struct {
- entries []int
- rowCount int
- colCount int
-}
-
-func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) {
- if len(entries) == 0 {
- return nil, fmt.Errorf("enries is empty")
- }
- if colCount <= 0 {
- return nil, fmt.Errorf("colCount must be >=1")
- }
- if len(entries)%colCount != 0 {
- return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount)
- }
-
- return &OriginalTable{
- entries: entries,
- rowCount: len(entries) / colCount,
- colCount: colCount,
- }, nil
-}
-
-type Compressor interface {
- Compress(orig *OriginalTable) error
- Lookup(row, col int) (int, error)
- OriginalTableSize() (int, int)
-}
-
-var (
- _ Compressor = &UniqueEntriesTable{}
- _ Compressor = &RowDisplacementTable{}
-)
-
-type UniqueEntriesTable struct {
- UniqueEntries []int
- RowNums []int
- OriginalRowCount int
- OriginalColCount int
-}
-
-func NewUniqueEntriesTable() *UniqueEntriesTable {
- return &UniqueEntriesTable{}
-}
-
-func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) {
- if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
- return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
- }
- return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil
-}
-
-func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) {
- return tab.OriginalRowCount, tab.OriginalColCount
-}
-
-func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error {
- var uniqueEntries []int
- rowNums := make([]int, orig.rowCount)
- hash2RowNum := map[string]int{}
- nextRowNum := 0
- for row := 0; row < orig.rowCount; row++ {
- var rowHash string
- {
- buf := make([]byte, 0, orig.colCount*8)
- for col := 0; col < orig.colCount; col++ {
- b := make([]byte, 8)
- binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col]))
- buf = append(buf, b...)
- }
- rowHash = string(buf)
- }
- rowNum, ok := hash2RowNum[rowHash]
- if !ok {
- rowNum = nextRowNum
- nextRowNum++
- hash2RowNum[rowHash] = rowNum
- start := row * orig.colCount
- entry := append([]int{}, orig.entries[start:start+orig.colCount]...)
- uniqueEntries = append(uniqueEntries, entry...)
- }
- rowNums[row] = rowNum
- }
-
- tab.UniqueEntries = uniqueEntries
- tab.RowNums = rowNums
- tab.OriginalRowCount = orig.rowCount
- tab.OriginalColCount = orig.colCount
-
- return nil
-}
-
-const ForbiddenValue = -1
-
-type RowDisplacementTable struct {
- OriginalRowCount int
- OriginalColCount int
- EmptyValue int
- Entries []int
- Bounds []int
- RowDisplacement []int
-}
-
-func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable {
- return &RowDisplacementTable{
- EmptyValue: emptyValue,
- }
-}
-
-func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) {
- if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
- return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
- }
- d := tab.RowDisplacement[row]
- if tab.Bounds[d+col] != row {
- return tab.EmptyValue, nil
- }
- return tab.Entries[d+col], nil
-}
-
-func (tab *RowDisplacementTable) OriginalTableSize() (int, int) {
- return tab.OriginalRowCount, tab.OriginalColCount
-}
-
-type rowInfo struct {
- rowNum int
- nonEmptyCount int
- nonEmptyCol []int
-}
-
-func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error {
- rowInfo := make([]rowInfo, orig.rowCount)
- {
- row := 0
- col := 0
- rowInfo[0].rowNum = 0
- for _, v := range orig.entries {
- if col == orig.colCount {
- row++
- col = 0
- rowInfo[row].rowNum = row
- }
- if v != tab.EmptyValue {
- rowInfo[row].nonEmptyCount++
- rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col)
- }
- col++
- }
-
- sort.SliceStable(rowInfo, func(i int, j int) bool {
- return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount
- })
- }
-
- origEntriesLen := len(orig.entries)
- entries := make([]int, origEntriesLen)
- bounds := make([]int, origEntriesLen)
- resultBottom := orig.colCount
- rowDisplacement := make([]int, orig.rowCount)
- {
- for i := 0; i < origEntriesLen; i++ {
- entries[i] = tab.EmptyValue
- bounds[i] = ForbiddenValue
- }
-
- nextRowDisplacement := 0
- for _, rInfo := range rowInfo {
- if rInfo.nonEmptyCount <= 0 {
- continue
- }
-
- for {
- isOverlapped := false
- for _, col := range rInfo.nonEmptyCol {
- if entries[nextRowDisplacement+col] == tab.EmptyValue {
- continue
- }
- nextRowDisplacement++
- isOverlapped = true
- break
- }
- if isOverlapped {
- continue
- }
-
- rowDisplacement[rInfo.rowNum] = nextRowDisplacement
- for _, col := range rInfo.nonEmptyCol {
- entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col]
- bounds[nextRowDisplacement+col] = rInfo.rowNum
- }
- resultBottom = nextRowDisplacement + orig.colCount
- nextRowDisplacement++
- break
- }
- }
- }
-
- tab.OriginalRowCount = orig.rowCount
- tab.OriginalColCount = orig.colCount
- tab.Entries = entries[:resultBottom]
- tab.Bounds = bounds[:resultBottom]
- tab.RowDisplacement = rowDisplacement
-
- return nil
-}
diff --git a/compressor/compressor_test.go b/compressor/compressor_test.go
deleted file mode 100644
index 621b731..0000000
--- a/compressor/compressor_test.go
+++ /dev/null
@@ -1,122 +0,0 @@
-package compressor
-
-import (
- "fmt"
- "testing"
-)
-
-func TestCompressor_Compress(t *testing.T) {
- x := 0 // an empty value
-
- allCompressors := func() []Compressor {
- return []Compressor{
- NewUniqueEntriesTable(),
- NewRowDisplacementTable(x),
- }
- }
-
- tests := []struct {
- original []int
- rowCount int
- colCount int
- compressors []Compressor
- }{
- {
- original: []int{
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- },
- rowCount: 3,
- colCount: 5,
- compressors: allCompressors(),
- },
- {
- original: []int{
- x, x, x, x, x,
- x, x, x, x, x,
- x, x, x, x, x,
- },
- rowCount: 3,
- colCount: 5,
- compressors: allCompressors(),
- },
- {
- original: []int{
- 1, 1, 1, 1, 1,
- x, x, x, x, x,
- 1, 1, 1, 1, 1,
- },
- rowCount: 3,
- colCount: 5,
- compressors: allCompressors(),
- },
- {
- original: []int{
- 1, x, 1, 1, 1,
- 1, 1, x, 1, 1,
- 1, 1, 1, x, 1,
- },
- rowCount: 3,
- colCount: 5,
- compressors: allCompressors(),
- },
- }
- for i, tt := range tests {
- for _, comp := range tt.compressors {
- t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) {
- dup := make([]int, len(tt.original))
- copy(dup, tt.original)
-
- orig, err := NewOriginalTable(tt.original, tt.colCount)
- if err != nil {
- t.Fatal(err)
- }
- err = comp.Compress(orig)
- if err != nil {
- t.Fatal(err)
- }
- rowCount, colCount := comp.OriginalTableSize()
- if rowCount != tt.rowCount || colCount != tt.colCount {
- t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount)
- }
- for i := 0; i < tt.rowCount; i++ {
- for j := 0; j < tt.colCount; j++ {
- v, err := comp.Lookup(i, j)
- if err != nil {
- t.Fatal(err)
- }
- expected := tt.original[i*tt.colCount+j]
- if v != expected {
- t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v)
- }
- }
- }
-
- // Calling with out-of-range indexes should be an error.
- if _, err := comp.Lookup(0, -1); err == nil {
- t.Fatalf("expected error didn't occur (0, -1)")
- }
- if _, err := comp.Lookup(-1, 0); err == nil {
- t.Fatalf("expected error didn't occur (-1, 0)")
- }
- if _, err := comp.Lookup(rowCount-1, colCount); err == nil {
- t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount)
- }
- if _, err := comp.Lookup(rowCount, colCount-1); err == nil {
- t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1)
- }
-
- // The compressor must not break the original table.
- for i := 0; i < tt.rowCount; i++ {
- for j := 0; j < tt.colCount; j++ {
- idx := i*tt.colCount + j
- if tt.original[idx] != dup[idx] {
- t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx])
- }
- }
- }
- })
- }
- }
-}
diff --git a/src/tre.go b/src/tre.go
index 204019a..cab7784 100644
--- a/src/tre.go
+++ b/src/tre.go
@@ -1,8 +1,10 @@
package tre
import (
+ "encoding/binary"
"fmt"
"strings"
+ "sort"
)
@@ -141,5 +143,214 @@ func splitCodePoint(from, to rune) ([]*cpRange, error) {
+type OriginalTable struct {
+ entries []int
+ rowCount int
+ colCount int
+}
+
+func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) {
+ if len(entries) == 0 {
+ return nil, fmt.Errorf("enries is empty")
+ }
+ if colCount <= 0 {
+ return nil, fmt.Errorf("colCount must be >=1")
+ }
+ if len(entries)%colCount != 0 {
+ return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount)
+ }
+
+ return &OriginalTable{
+ entries: entries,
+ rowCount: len(entries) / colCount,
+ colCount: colCount,
+ }, nil
+}
+
+type Compressor interface {
+ Compress(orig *OriginalTable) error
+ Lookup(row, col int) (int, error)
+ OriginalTableSize() (int, int)
+}
+
+var (
+ _ Compressor = &UniqueEntriesTable{}
+ _ Compressor = &RowDisplacementTable{}
+)
+
+type UniqueEntriesTable struct {
+ UniqueEntries []int
+ RowNums []int
+ OriginalRowCount int
+ OriginalColCount int
+}
+
+func NewUniqueEntriesTable() *UniqueEntriesTable {
+ return &UniqueEntriesTable{}
+}
+
+func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) {
+ if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
+ return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
+ }
+ return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil
+}
+
+func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) {
+ return tab.OriginalRowCount, tab.OriginalColCount
+}
+
+func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error {
+ var uniqueEntries []int
+ rowNums := make([]int, orig.rowCount)
+ hash2RowNum := map[string]int{}
+ nextRowNum := 0
+ for row := 0; row < orig.rowCount; row++ {
+ var rowHash string
+ {
+ buf := make([]byte, 0, orig.colCount*8)
+ for col := 0; col < orig.colCount; col++ {
+ b := make([]byte, 8)
+ binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col]))
+ buf = append(buf, b...)
+ }
+ rowHash = string(buf)
+ }
+ rowNum, ok := hash2RowNum[rowHash]
+ if !ok {
+ rowNum = nextRowNum
+ nextRowNum++
+ hash2RowNum[rowHash] = rowNum
+ start := row * orig.colCount
+ entry := append([]int{}, orig.entries[start:start+orig.colCount]...)
+ uniqueEntries = append(uniqueEntries, entry...)
+ }
+ rowNums[row] = rowNum
+ }
+
+ tab.UniqueEntries = uniqueEntries
+ tab.RowNums = rowNums
+ tab.OriginalRowCount = orig.rowCount
+ tab.OriginalColCount = orig.colCount
+
+ return nil
+}
+
+const ForbiddenValue = -1
+
+type RowDisplacementTable struct {
+ OriginalRowCount int
+ OriginalColCount int
+ EmptyValue int
+ Entries []int
+ Bounds []int
+ RowDisplacement []int
+}
+
+func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable {
+ return &RowDisplacementTable{
+ EmptyValue: emptyValue,
+ }
+}
+
+func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) {
+ if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
+ return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
+ }
+ d := tab.RowDisplacement[row]
+ if tab.Bounds[d+col] != row {
+ return tab.EmptyValue, nil
+ }
+ return tab.Entries[d+col], nil
+}
+
+func (tab *RowDisplacementTable) OriginalTableSize() (int, int) {
+ return tab.OriginalRowCount, tab.OriginalColCount
+}
+
+type rowInfo struct {
+ rowNum int
+ nonEmptyCount int
+ nonEmptyCol []int
+}
+
+func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error {
+ rowInfo := make([]rowInfo, orig.rowCount)
+ {
+ row := 0
+ col := 0
+ rowInfo[0].rowNum = 0
+ for _, v := range orig.entries {
+ if col == orig.colCount {
+ row++
+ col = 0
+ rowInfo[row].rowNum = row
+ }
+ if v != tab.EmptyValue {
+ rowInfo[row].nonEmptyCount++
+ rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col)
+ }
+ col++
+ }
+
+ sort.SliceStable(rowInfo, func(i int, j int) bool {
+ return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount
+ })
+ }
+
+ origEntriesLen := len(orig.entries)
+ entries := make([]int, origEntriesLen)
+ bounds := make([]int, origEntriesLen)
+ resultBottom := orig.colCount
+ rowDisplacement := make([]int, orig.rowCount)
+ {
+ for i := 0; i < origEntriesLen; i++ {
+ entries[i] = tab.EmptyValue
+ bounds[i] = ForbiddenValue
+ }
+
+ nextRowDisplacement := 0
+ for _, rInfo := range rowInfo {
+ if rInfo.nonEmptyCount <= 0 {
+ continue
+ }
+
+ for {
+ isOverlapped := false
+ for _, col := range rInfo.nonEmptyCol {
+ if entries[nextRowDisplacement+col] == tab.EmptyValue {
+ continue
+ }
+ nextRowDisplacement++
+ isOverlapped = true
+ break
+ }
+ if isOverlapped {
+ continue
+ }
+
+ rowDisplacement[rInfo.rowNum] = nextRowDisplacement
+ for _, col := range rInfo.nonEmptyCol {
+ entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col]
+ bounds[nextRowDisplacement+col] = rInfo.rowNum
+ }
+ resultBottom = nextRowDisplacement + orig.colCount
+ nextRowDisplacement++
+ break
+ }
+ }
+ }
+
+ tab.OriginalRowCount = orig.rowCount
+ tab.OriginalColCount = orig.colCount
+ tab.Entries = entries[:resultBottom]
+ tab.Bounds = bounds[:resultBottom]
+ tab.RowDisplacement = rowDisplacement
+
+ return nil
+}
+
+
+
func Main() {
}
diff --git a/tests/tre.go b/tests/tre.go
index 1f3cfed..9029259 100644
--- a/tests/tre.go
+++ b/tests/tre.go
@@ -240,12 +240,129 @@ func TestGenCharBlocksIllFormed(t *testing.T) {
}
}
+func TestCompressor_Compress(t *testing.T) {
+ x := 0 // an empty value
+
+ allCompressors := func() []Compressor {
+ return []Compressor{
+ NewUniqueEntriesTable(),
+ NewRowDisplacementTable(x),
+ }
+ }
+
+ tests := []struct {
+ original []int
+ rowCount int
+ colCount int
+ compressors []Compressor
+ }{
+ {
+ original: []int{
+ 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ x, x, x, x, x,
+ x, x, x, x, x,
+ x, x, x, x, x,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ 1, 1, 1, 1, 1,
+ x, x, x, x, x,
+ 1, 1, 1, 1, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ 1, x, 1, 1, 1,
+ 1, 1, x, 1, 1,
+ 1, 1, 1, x, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ }
+ for i, tt := range tests {
+ for _, comp := range tt.compressors {
+ t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) {
+ dup := make([]int, len(tt.original))
+ copy(dup, tt.original)
+
+ orig, err := NewOriginalTable(tt.original, tt.colCount)
+ if err != nil {
+ t.Fatal(err)
+ }
+ err = comp.Compress(orig)
+ if err != nil {
+ t.Fatal(err)
+ }
+ rowCount, colCount := comp.OriginalTableSize()
+ if rowCount != tt.rowCount || colCount != tt.colCount {
+ t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount)
+ }
+ for i := 0; i < tt.rowCount; i++ {
+ for j := 0; j < tt.colCount; j++ {
+ v, err := comp.Lookup(i, j)
+ if err != nil {
+ t.Fatal(err)
+ }
+ expected := tt.original[i*tt.colCount+j]
+ if v != expected {
+ t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v)
+ }
+ }
+ }
+
+ // Calling with out-of-range indexes should be an error.
+ if _, err := comp.Lookup(0, -1); err == nil {
+ t.Fatalf("expected error didn't occur (0, -1)")
+ }
+ if _, err := comp.Lookup(-1, 0); err == nil {
+ t.Fatalf("expected error didn't occur (-1, 0)")
+ }
+ if _, err := comp.Lookup(rowCount-1, colCount); err == nil {
+ t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount)
+ }
+ if _, err := comp.Lookup(rowCount, colCount-1); err == nil {
+ t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1)
+ }
+
+ // The compressor must not break the original table.
+ for i := 0; i < tt.rowCount; i++ {
+ for j := 0; j < tt.colCount; j++ {
+ idx := i*tt.colCount + j
+ if tt.original[idx] != dup[idx] {
+ t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx])
+ }
+ }
+ }
+ })
+ }
+ }
+}
+
func MainTest() {
tests := []testing.InternalTest{
{ "TestGenCharBlocksWellFormed", TestGenCharBlocksWellFormed },
{ "TestGenCharBlocksIllFormed", TestGenCharBlocksIllFormed },
+ { "TestCompressor_Compress", TestCompressor_Compress },
}
deps := testdeps.TestDeps{}