diff options
-rw-r--r-- | compressor/compressor.go | 214 | ||||
-rw-r--r-- | compressor/compressor_test.go | 122 | ||||
-rw-r--r-- | src/tre.go | 211 | ||||
-rw-r--r-- | tests/tre.go | 117 |
4 files changed, 328 insertions, 336 deletions
diff --git a/compressor/compressor.go b/compressor/compressor.go deleted file mode 100644 index cdfeacb..0000000 --- a/compressor/compressor.go +++ /dev/null @@ -1,214 +0,0 @@ -package compressor - -import ( - "encoding/binary" - "fmt" - "sort" -) - -type OriginalTable struct { - entries []int - rowCount int - colCount int -} - -func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) { - if len(entries) == 0 { - return nil, fmt.Errorf("enries is empty") - } - if colCount <= 0 { - return nil, fmt.Errorf("colCount must be >=1") - } - if len(entries)%colCount != 0 { - return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount) - } - - return &OriginalTable{ - entries: entries, - rowCount: len(entries) / colCount, - colCount: colCount, - }, nil -} - -type Compressor interface { - Compress(orig *OriginalTable) error - Lookup(row, col int) (int, error) - OriginalTableSize() (int, int) -} - -var ( - _ Compressor = &UniqueEntriesTable{} - _ Compressor = &RowDisplacementTable{} -) - -type UniqueEntriesTable struct { - UniqueEntries []int - RowNums []int - OriginalRowCount int - OriginalColCount int -} - -func NewUniqueEntriesTable() *UniqueEntriesTable { - return &UniqueEntriesTable{} -} - -func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) { - if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { - return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) - } - return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil -} - -func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) { - return tab.OriginalRowCount, tab.OriginalColCount -} - -func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error { - var uniqueEntries []int - rowNums := make([]int, orig.rowCount) - hash2RowNum := map[string]int{} - nextRowNum := 0 - for row := 0; row < orig.rowCount; row++ { - var rowHash string - { - buf := make([]byte, 0, orig.colCount*8) - for col := 0; col < orig.colCount; col++ { - b := make([]byte, 8) - binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col])) - buf = append(buf, b...) - } - rowHash = string(buf) - } - rowNum, ok := hash2RowNum[rowHash] - if !ok { - rowNum = nextRowNum - nextRowNum++ - hash2RowNum[rowHash] = rowNum - start := row * orig.colCount - entry := append([]int{}, orig.entries[start:start+orig.colCount]...) - uniqueEntries = append(uniqueEntries, entry...) - } - rowNums[row] = rowNum - } - - tab.UniqueEntries = uniqueEntries - tab.RowNums = rowNums - tab.OriginalRowCount = orig.rowCount - tab.OriginalColCount = orig.colCount - - return nil -} - -const ForbiddenValue = -1 - -type RowDisplacementTable struct { - OriginalRowCount int - OriginalColCount int - EmptyValue int - Entries []int - Bounds []int - RowDisplacement []int -} - -func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable { - return &RowDisplacementTable{ - EmptyValue: emptyValue, - } -} - -func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) { - if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { - return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) - } - d := tab.RowDisplacement[row] - if tab.Bounds[d+col] != row { - return tab.EmptyValue, nil - } - return tab.Entries[d+col], nil -} - -func (tab *RowDisplacementTable) OriginalTableSize() (int, int) { - return tab.OriginalRowCount, tab.OriginalColCount -} - -type rowInfo struct { - rowNum int - nonEmptyCount int - nonEmptyCol []int -} - -func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error { - rowInfo := make([]rowInfo, orig.rowCount) - { - row := 0 - col := 0 - rowInfo[0].rowNum = 0 - for _, v := range orig.entries { - if col == orig.colCount { - row++ - col = 0 - rowInfo[row].rowNum = row - } - if v != tab.EmptyValue { - rowInfo[row].nonEmptyCount++ - rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col) - } - col++ - } - - sort.SliceStable(rowInfo, func(i int, j int) bool { - return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount - }) - } - - origEntriesLen := len(orig.entries) - entries := make([]int, origEntriesLen) - bounds := make([]int, origEntriesLen) - resultBottom := orig.colCount - rowDisplacement := make([]int, orig.rowCount) - { - for i := 0; i < origEntriesLen; i++ { - entries[i] = tab.EmptyValue - bounds[i] = ForbiddenValue - } - - nextRowDisplacement := 0 - for _, rInfo := range rowInfo { - if rInfo.nonEmptyCount <= 0 { - continue - } - - for { - isOverlapped := false - for _, col := range rInfo.nonEmptyCol { - if entries[nextRowDisplacement+col] == tab.EmptyValue { - continue - } - nextRowDisplacement++ - isOverlapped = true - break - } - if isOverlapped { - continue - } - - rowDisplacement[rInfo.rowNum] = nextRowDisplacement - for _, col := range rInfo.nonEmptyCol { - entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col] - bounds[nextRowDisplacement+col] = rInfo.rowNum - } - resultBottom = nextRowDisplacement + orig.colCount - nextRowDisplacement++ - break - } - } - } - - tab.OriginalRowCount = orig.rowCount - tab.OriginalColCount = orig.colCount - tab.Entries = entries[:resultBottom] - tab.Bounds = bounds[:resultBottom] - tab.RowDisplacement = rowDisplacement - - return nil -} diff --git a/compressor/compressor_test.go b/compressor/compressor_test.go deleted file mode 100644 index 621b731..0000000 --- a/compressor/compressor_test.go +++ /dev/null @@ -1,122 +0,0 @@ -package compressor - -import ( - "fmt" - "testing" -) - -func TestCompressor_Compress(t *testing.T) { - x := 0 // an empty value - - allCompressors := func() []Compressor { - return []Compressor{ - NewUniqueEntriesTable(), - NewRowDisplacementTable(x), - } - } - - tests := []struct { - original []int - rowCount int - colCount int - compressors []Compressor - }{ - { - original: []int{ - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - x, x, x, x, x, - x, x, x, x, x, - x, x, x, x, x, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - 1, 1, 1, 1, 1, - x, x, x, x, x, - 1, 1, 1, 1, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - { - original: []int{ - 1, x, 1, 1, 1, - 1, 1, x, 1, 1, - 1, 1, 1, x, 1, - }, - rowCount: 3, - colCount: 5, - compressors: allCompressors(), - }, - } - for i, tt := range tests { - for _, comp := range tt.compressors { - t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) { - dup := make([]int, len(tt.original)) - copy(dup, tt.original) - - orig, err := NewOriginalTable(tt.original, tt.colCount) - if err != nil { - t.Fatal(err) - } - err = comp.Compress(orig) - if err != nil { - t.Fatal(err) - } - rowCount, colCount := comp.OriginalTableSize() - if rowCount != tt.rowCount || colCount != tt.colCount { - t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount) - } - for i := 0; i < tt.rowCount; i++ { - for j := 0; j < tt.colCount; j++ { - v, err := comp.Lookup(i, j) - if err != nil { - t.Fatal(err) - } - expected := tt.original[i*tt.colCount+j] - if v != expected { - t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v) - } - } - } - - // Calling with out-of-range indexes should be an error. - if _, err := comp.Lookup(0, -1); err == nil { - t.Fatalf("expected error didn't occur (0, -1)") - } - if _, err := comp.Lookup(-1, 0); err == nil { - t.Fatalf("expected error didn't occur (-1, 0)") - } - if _, err := comp.Lookup(rowCount-1, colCount); err == nil { - t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount) - } - if _, err := comp.Lookup(rowCount, colCount-1); err == nil { - t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1) - } - - // The compressor must not break the original table. - for i := 0; i < tt.rowCount; i++ { - for j := 0; j < tt.colCount; j++ { - idx := i*tt.colCount + j - if tt.original[idx] != dup[idx] { - t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx]) - } - } - } - }) - } - } -} @@ -1,8 +1,10 @@ package tre import ( + "encoding/binary" "fmt" "strings" + "sort" ) @@ -141,5 +143,214 @@ func splitCodePoint(from, to rune) ([]*cpRange, error) { +type OriginalTable struct { + entries []int + rowCount int + colCount int +} + +func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) { + if len(entries) == 0 { + return nil, fmt.Errorf("enries is empty") + } + if colCount <= 0 { + return nil, fmt.Errorf("colCount must be >=1") + } + if len(entries)%colCount != 0 { + return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount) + } + + return &OriginalTable{ + entries: entries, + rowCount: len(entries) / colCount, + colCount: colCount, + }, nil +} + +type Compressor interface { + Compress(orig *OriginalTable) error + Lookup(row, col int) (int, error) + OriginalTableSize() (int, int) +} + +var ( + _ Compressor = &UniqueEntriesTable{} + _ Compressor = &RowDisplacementTable{} +) + +type UniqueEntriesTable struct { + UniqueEntries []int + RowNums []int + OriginalRowCount int + OriginalColCount int +} + +func NewUniqueEntriesTable() *UniqueEntriesTable { + return &UniqueEntriesTable{} +} + +func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) { + if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { + return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) + } + return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil +} + +func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) { + return tab.OriginalRowCount, tab.OriginalColCount +} + +func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error { + var uniqueEntries []int + rowNums := make([]int, orig.rowCount) + hash2RowNum := map[string]int{} + nextRowNum := 0 + for row := 0; row < orig.rowCount; row++ { + var rowHash string + { + buf := make([]byte, 0, orig.colCount*8) + for col := 0; col < orig.colCount; col++ { + b := make([]byte, 8) + binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col])) + buf = append(buf, b...) + } + rowHash = string(buf) + } + rowNum, ok := hash2RowNum[rowHash] + if !ok { + rowNum = nextRowNum + nextRowNum++ + hash2RowNum[rowHash] = rowNum + start := row * orig.colCount + entry := append([]int{}, orig.entries[start:start+orig.colCount]...) + uniqueEntries = append(uniqueEntries, entry...) + } + rowNums[row] = rowNum + } + + tab.UniqueEntries = uniqueEntries + tab.RowNums = rowNums + tab.OriginalRowCount = orig.rowCount + tab.OriginalColCount = orig.colCount + + return nil +} + +const ForbiddenValue = -1 + +type RowDisplacementTable struct { + OriginalRowCount int + OriginalColCount int + EmptyValue int + Entries []int + Bounds []int + RowDisplacement []int +} + +func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable { + return &RowDisplacementTable{ + EmptyValue: emptyValue, + } +} + +func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) { + if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { + return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) + } + d := tab.RowDisplacement[row] + if tab.Bounds[d+col] != row { + return tab.EmptyValue, nil + } + return tab.Entries[d+col], nil +} + +func (tab *RowDisplacementTable) OriginalTableSize() (int, int) { + return tab.OriginalRowCount, tab.OriginalColCount +} + +type rowInfo struct { + rowNum int + nonEmptyCount int + nonEmptyCol []int +} + +func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error { + rowInfo := make([]rowInfo, orig.rowCount) + { + row := 0 + col := 0 + rowInfo[0].rowNum = 0 + for _, v := range orig.entries { + if col == orig.colCount { + row++ + col = 0 + rowInfo[row].rowNum = row + } + if v != tab.EmptyValue { + rowInfo[row].nonEmptyCount++ + rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col) + } + col++ + } + + sort.SliceStable(rowInfo, func(i int, j int) bool { + return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount + }) + } + + origEntriesLen := len(orig.entries) + entries := make([]int, origEntriesLen) + bounds := make([]int, origEntriesLen) + resultBottom := orig.colCount + rowDisplacement := make([]int, orig.rowCount) + { + for i := 0; i < origEntriesLen; i++ { + entries[i] = tab.EmptyValue + bounds[i] = ForbiddenValue + } + + nextRowDisplacement := 0 + for _, rInfo := range rowInfo { + if rInfo.nonEmptyCount <= 0 { + continue + } + + for { + isOverlapped := false + for _, col := range rInfo.nonEmptyCol { + if entries[nextRowDisplacement+col] == tab.EmptyValue { + continue + } + nextRowDisplacement++ + isOverlapped = true + break + } + if isOverlapped { + continue + } + + rowDisplacement[rInfo.rowNum] = nextRowDisplacement + for _, col := range rInfo.nonEmptyCol { + entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col] + bounds[nextRowDisplacement+col] = rInfo.rowNum + } + resultBottom = nextRowDisplacement + orig.colCount + nextRowDisplacement++ + break + } + } + } + + tab.OriginalRowCount = orig.rowCount + tab.OriginalColCount = orig.colCount + tab.Entries = entries[:resultBottom] + tab.Bounds = bounds[:resultBottom] + tab.RowDisplacement = rowDisplacement + + return nil +} + + + func Main() { } diff --git a/tests/tre.go b/tests/tre.go index 1f3cfed..9029259 100644 --- a/tests/tre.go +++ b/tests/tre.go @@ -240,12 +240,129 @@ func TestGenCharBlocksIllFormed(t *testing.T) { } } +func TestCompressor_Compress(t *testing.T) { + x := 0 // an empty value + + allCompressors := func() []Compressor { + return []Compressor{ + NewUniqueEntriesTable(), + NewRowDisplacementTable(x), + } + } + + tests := []struct { + original []int + rowCount int + colCount int + compressors []Compressor + }{ + { + original: []int{ + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + x, x, x, x, x, + x, x, x, x, x, + x, x, x, x, x, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + 1, 1, 1, 1, 1, + x, x, x, x, x, + 1, 1, 1, 1, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + { + original: []int{ + 1, x, 1, 1, 1, + 1, 1, x, 1, 1, + 1, 1, 1, x, 1, + }, + rowCount: 3, + colCount: 5, + compressors: allCompressors(), + }, + } + for i, tt := range tests { + for _, comp := range tt.compressors { + t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) { + dup := make([]int, len(tt.original)) + copy(dup, tt.original) + + orig, err := NewOriginalTable(tt.original, tt.colCount) + if err != nil { + t.Fatal(err) + } + err = comp.Compress(orig) + if err != nil { + t.Fatal(err) + } + rowCount, colCount := comp.OriginalTableSize() + if rowCount != tt.rowCount || colCount != tt.colCount { + t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount) + } + for i := 0; i < tt.rowCount; i++ { + for j := 0; j < tt.colCount; j++ { + v, err := comp.Lookup(i, j) + if err != nil { + t.Fatal(err) + } + expected := tt.original[i*tt.colCount+j] + if v != expected { + t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v) + } + } + } + + // Calling with out-of-range indexes should be an error. + if _, err := comp.Lookup(0, -1); err == nil { + t.Fatalf("expected error didn't occur (0, -1)") + } + if _, err := comp.Lookup(-1, 0); err == nil { + t.Fatalf("expected error didn't occur (-1, 0)") + } + if _, err := comp.Lookup(rowCount-1, colCount); err == nil { + t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount) + } + if _, err := comp.Lookup(rowCount, colCount-1); err == nil { + t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1) + } + + // The compressor must not break the original table. + for i := 0; i < tt.rowCount; i++ { + for j := 0; j < tt.colCount; j++ { + idx := i*tt.colCount + j + if tt.original[idx] != dup[idx] { + t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx]) + } + } + } + }) + } + } +} + func MainTest() { tests := []testing.InternalTest{ { "TestGenCharBlocksWellFormed", TestGenCharBlocksWellFormed }, { "TestGenCharBlocksIllFormed", TestGenCharBlocksIllFormed }, + { "TestCompressor_Compress", TestCompressor_Compress }, } deps := testdeps.TestDeps{} |