diff options
Diffstat (limited to 'src/tre.go')
-rw-r--r-- | src/tre.go | 211 |
1 files changed, 211 insertions, 0 deletions
@@ -1,8 +1,10 @@ package tre import ( + "encoding/binary" "fmt" "strings" + "sort" ) @@ -141,5 +143,214 @@ func splitCodePoint(from, to rune) ([]*cpRange, error) { +type OriginalTable struct { + entries []int + rowCount int + colCount int +} + +func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) { + if len(entries) == 0 { + return nil, fmt.Errorf("enries is empty") + } + if colCount <= 0 { + return nil, fmt.Errorf("colCount must be >=1") + } + if len(entries)%colCount != 0 { + return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount) + } + + return &OriginalTable{ + entries: entries, + rowCount: len(entries) / colCount, + colCount: colCount, + }, nil +} + +type Compressor interface { + Compress(orig *OriginalTable) error + Lookup(row, col int) (int, error) + OriginalTableSize() (int, int) +} + +var ( + _ Compressor = &UniqueEntriesTable{} + _ Compressor = &RowDisplacementTable{} +) + +type UniqueEntriesTable struct { + UniqueEntries []int + RowNums []int + OriginalRowCount int + OriginalColCount int +} + +func NewUniqueEntriesTable() *UniqueEntriesTable { + return &UniqueEntriesTable{} +} + +func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) { + if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { + return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) + } + return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil +} + +func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) { + return tab.OriginalRowCount, tab.OriginalColCount +} + +func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error { + var uniqueEntries []int + rowNums := make([]int, orig.rowCount) + hash2RowNum := map[string]int{} + nextRowNum := 0 + for row := 0; row < orig.rowCount; row++ { + var rowHash string + { + buf := make([]byte, 0, orig.colCount*8) + for col := 0; col < orig.colCount; col++ { + b := make([]byte, 8) + binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col])) + buf = append(buf, b...) + } + rowHash = string(buf) + } + rowNum, ok := hash2RowNum[rowHash] + if !ok { + rowNum = nextRowNum + nextRowNum++ + hash2RowNum[rowHash] = rowNum + start := row * orig.colCount + entry := append([]int{}, orig.entries[start:start+orig.colCount]...) + uniqueEntries = append(uniqueEntries, entry...) + } + rowNums[row] = rowNum + } + + tab.UniqueEntries = uniqueEntries + tab.RowNums = rowNums + tab.OriginalRowCount = orig.rowCount + tab.OriginalColCount = orig.colCount + + return nil +} + +const ForbiddenValue = -1 + +type RowDisplacementTable struct { + OriginalRowCount int + OriginalColCount int + EmptyValue int + Entries []int + Bounds []int + RowDisplacement []int +} + +func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable { + return &RowDisplacementTable{ + EmptyValue: emptyValue, + } +} + +func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) { + if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount { + return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col) + } + d := tab.RowDisplacement[row] + if tab.Bounds[d+col] != row { + return tab.EmptyValue, nil + } + return tab.Entries[d+col], nil +} + +func (tab *RowDisplacementTable) OriginalTableSize() (int, int) { + return tab.OriginalRowCount, tab.OriginalColCount +} + +type rowInfo struct { + rowNum int + nonEmptyCount int + nonEmptyCol []int +} + +func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error { + rowInfo := make([]rowInfo, orig.rowCount) + { + row := 0 + col := 0 + rowInfo[0].rowNum = 0 + for _, v := range orig.entries { + if col == orig.colCount { + row++ + col = 0 + rowInfo[row].rowNum = row + } + if v != tab.EmptyValue { + rowInfo[row].nonEmptyCount++ + rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col) + } + col++ + } + + sort.SliceStable(rowInfo, func(i int, j int) bool { + return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount + }) + } + + origEntriesLen := len(orig.entries) + entries := make([]int, origEntriesLen) + bounds := make([]int, origEntriesLen) + resultBottom := orig.colCount + rowDisplacement := make([]int, orig.rowCount) + { + for i := 0; i < origEntriesLen; i++ { + entries[i] = tab.EmptyValue + bounds[i] = ForbiddenValue + } + + nextRowDisplacement := 0 + for _, rInfo := range rowInfo { + if rInfo.nonEmptyCount <= 0 { + continue + } + + for { + isOverlapped := false + for _, col := range rInfo.nonEmptyCol { + if entries[nextRowDisplacement+col] == tab.EmptyValue { + continue + } + nextRowDisplacement++ + isOverlapped = true + break + } + if isOverlapped { + continue + } + + rowDisplacement[rInfo.rowNum] = nextRowDisplacement + for _, col := range rInfo.nonEmptyCol { + entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col] + bounds[nextRowDisplacement+col] = rInfo.rowNum + } + resultBottom = nextRowDisplacement + orig.colCount + nextRowDisplacement++ + break + } + } + } + + tab.OriginalRowCount = orig.rowCount + tab.OriginalColCount = orig.colCount + tab.Entries = entries[:resultBottom] + tab.Bounds = bounds[:resultBottom] + tab.RowDisplacement = rowDisplacement + + return nil +} + + + func Main() { } |