aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/compiler.go53
-rw-r--r--compiler/dfa.go16
-rw-r--r--compressor/compressor.go214
-rw-r--r--compressor/compressor_test.go122
-rw-r--r--driver/lexer.go18
-rw-r--r--spec/spec.go26
6 files changed, 431 insertions, 18 deletions
diff --git a/compiler/compiler.go b/compiler/compiler.go
index 6f878c5..ffe0511 100644
--- a/compiler/compiler.go
+++ b/compiler/compiler.go
@@ -5,6 +5,7 @@ import (
"io"
"strings"
+ "github.com/nihei9/maleeni/compressor"
"github.com/nihei9/maleeni/log"
"github.com/nihei9/maleeni/spec"
)
@@ -160,14 +161,19 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
}
config.logger.Log(`DFA:
- States: %v states
- Initial State: %v`, len(tranTab.Transition), tranTab.InitialState)
+ States: %v states (%v entries)
+ Initial State: %v`, tranTab.RowCount, tranTab.RowCount*tranTab.ColCount, tranTab.InitialState)
config.logger.Log(" Accepting States:")
for state, symbol := range tranTab.AcceptingStates {
config.logger.Log(" %v: %v", state, symbol)
}
}
+ tranTab, err := compressTransitionTable(tranTab)
+ if err != nil {
+ return nil, err
+ }
+
return &spec.CompiledLexModeSpec{
Kinds: kinds,
Push: push,
@@ -175,3 +181,46 @@ func compile(entries []*spec.LexEntry, modeNums map[spec.LexModeName]spec.LexMod
DFA: tranTab,
}, nil
}
+
+func compressTransitionTable(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
+ ueTab := compressor.NewUniqueEntriesTable()
+ {
+ orig, err := compressor.NewOriginalTable(tranTab.UncompressedTransition, tranTab.ColCount)
+ if err != nil {
+ return nil, err
+ }
+ err = ueTab.Compress(orig)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ rdTab := compressor.NewRowDisplacementTable(0)
+ {
+ orig, err := compressor.NewOriginalTable(ueTab.UniqueEntries, ueTab.OriginalColCount)
+ if err != nil {
+ return nil, err
+ }
+ err = rdTab.Compress(orig)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ tranTab.Transition = &spec.UniqueEntriesTable{
+ UniqueEntries: &spec.RowDisplacementTable{
+ OriginalRowCount: rdTab.OriginalRowCount,
+ OriginalColCount: rdTab.OriginalColCount,
+ EmptyValue: rdTab.EmptyValue,
+ Entries: rdTab.Entries,
+ Bounds: rdTab.Bounds,
+ RowDisplacement: rdTab.RowDisplacement,
+ },
+ RowNums: ueTab.RowNums,
+ OriginalRowCount: ueTab.OriginalRowCount,
+ OriginalColCount: ueTab.OriginalColCount,
+ }
+ tranTab.UncompressedTransition = nil
+
+ return tranTab, nil
+}
diff --git a/compiler/dfa.go b/compiler/dfa.go
index 049ff3e..de2c451 100644
--- a/compiler/dfa.go
+++ b/compiler/dfa.go
@@ -112,18 +112,20 @@ func genTransitionTable(dfa *DFA) (*spec.TransitionTable, error) {
acc[state2Num[s]] = id
}
- tran := make([][]int, len(dfa.States)+1)
+ rowCount := len(dfa.States) + 1
+ colCount := 256
+ tran := make([]int, rowCount*colCount)
for s, tab := range dfa.TransitionTable {
- entry := make([]int, 256)
for v, to := range tab {
- entry[v] = state2Num[to]
+ tran[state2Num[s]*256+v] = state2Num[to]
}
- tran[state2Num[s]] = entry
}
return &spec.TransitionTable{
- InitialState: state2Num[dfa.InitialState],
- AcceptingStates: acc,
- Transition: tran,
+ InitialState: state2Num[dfa.InitialState],
+ AcceptingStates: acc,
+ UncompressedTransition: tran,
+ RowCount: rowCount,
+ ColCount: colCount,
}, nil
}
diff --git a/compressor/compressor.go b/compressor/compressor.go
new file mode 100644
index 0000000..cdfeacb
--- /dev/null
+++ b/compressor/compressor.go
@@ -0,0 +1,214 @@
+package compressor
+
+import (
+ "encoding/binary"
+ "fmt"
+ "sort"
+)
+
+type OriginalTable struct {
+ entries []int
+ rowCount int
+ colCount int
+}
+
+func NewOriginalTable(entries []int, colCount int) (*OriginalTable, error) {
+ if len(entries) == 0 {
+ return nil, fmt.Errorf("enries is empty")
+ }
+ if colCount <= 0 {
+ return nil, fmt.Errorf("colCount must be >=1")
+ }
+ if len(entries)%colCount != 0 {
+ return nil, fmt.Errorf("entries length or column count are incorrect; entries length: %v, column count: %v", len(entries), colCount)
+ }
+
+ return &OriginalTable{
+ entries: entries,
+ rowCount: len(entries) / colCount,
+ colCount: colCount,
+ }, nil
+}
+
+type Compressor interface {
+ Compress(orig *OriginalTable) error
+ Lookup(row, col int) (int, error)
+ OriginalTableSize() (int, int)
+}
+
+var (
+ _ Compressor = &UniqueEntriesTable{}
+ _ Compressor = &RowDisplacementTable{}
+)
+
+type UniqueEntriesTable struct {
+ UniqueEntries []int
+ RowNums []int
+ OriginalRowCount int
+ OriginalColCount int
+}
+
+func NewUniqueEntriesTable() *UniqueEntriesTable {
+ return &UniqueEntriesTable{}
+}
+
+func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) {
+ if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
+ return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
+ }
+ return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil
+}
+
+func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) {
+ return tab.OriginalRowCount, tab.OriginalColCount
+}
+
+func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error {
+ var uniqueEntries []int
+ rowNums := make([]int, orig.rowCount)
+ hash2RowNum := map[string]int{}
+ nextRowNum := 0
+ for row := 0; row < orig.rowCount; row++ {
+ var rowHash string
+ {
+ buf := make([]byte, 0, orig.colCount*8)
+ for col := 0; col < orig.colCount; col++ {
+ b := make([]byte, 8)
+ binary.PutUvarint(b, uint64(orig.entries[row*orig.colCount+col]))
+ buf = append(buf, b...)
+ }
+ rowHash = string(buf)
+ }
+ rowNum, ok := hash2RowNum[rowHash]
+ if !ok {
+ rowNum = nextRowNum
+ nextRowNum++
+ hash2RowNum[rowHash] = rowNum
+ start := row * orig.colCount
+ entry := append([]int{}, orig.entries[start:start+orig.colCount]...)
+ uniqueEntries = append(uniqueEntries, entry...)
+ }
+ rowNums[row] = rowNum
+ }
+
+ tab.UniqueEntries = uniqueEntries
+ tab.RowNums = rowNums
+ tab.OriginalRowCount = orig.rowCount
+ tab.OriginalColCount = orig.colCount
+
+ return nil
+}
+
+const ForbiddenValue = -1
+
+type RowDisplacementTable struct {
+ OriginalRowCount int
+ OriginalColCount int
+ EmptyValue int
+ Entries []int
+ Bounds []int
+ RowDisplacement []int
+}
+
+func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable {
+ return &RowDisplacementTable{
+ EmptyValue: emptyValue,
+ }
+}
+
+func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) {
+ if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
+ return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
+ }
+ d := tab.RowDisplacement[row]
+ if tab.Bounds[d+col] != row {
+ return tab.EmptyValue, nil
+ }
+ return tab.Entries[d+col], nil
+}
+
+func (tab *RowDisplacementTable) OriginalTableSize() (int, int) {
+ return tab.OriginalRowCount, tab.OriginalColCount
+}
+
+type rowInfo struct {
+ rowNum int
+ nonEmptyCount int
+ nonEmptyCol []int
+}
+
+func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error {
+ rowInfo := make([]rowInfo, orig.rowCount)
+ {
+ row := 0
+ col := 0
+ rowInfo[0].rowNum = 0
+ for _, v := range orig.entries {
+ if col == orig.colCount {
+ row++
+ col = 0
+ rowInfo[row].rowNum = row
+ }
+ if v != tab.EmptyValue {
+ rowInfo[row].nonEmptyCount++
+ rowInfo[row].nonEmptyCol = append(rowInfo[row].nonEmptyCol, col)
+ }
+ col++
+ }
+
+ sort.SliceStable(rowInfo, func(i int, j int) bool {
+ return rowInfo[i].nonEmptyCount > rowInfo[j].nonEmptyCount
+ })
+ }
+
+ origEntriesLen := len(orig.entries)
+ entries := make([]int, origEntriesLen)
+ bounds := make([]int, origEntriesLen)
+ resultBottom := orig.colCount
+ rowDisplacement := make([]int, orig.rowCount)
+ {
+ for i := 0; i < origEntriesLen; i++ {
+ entries[i] = tab.EmptyValue
+ bounds[i] = ForbiddenValue
+ }
+
+ nextRowDisplacement := 0
+ for _, rInfo := range rowInfo {
+ if rInfo.nonEmptyCount <= 0 {
+ continue
+ }
+
+ for {
+ isOverlapped := false
+ for _, col := range rInfo.nonEmptyCol {
+ if entries[nextRowDisplacement+col] == tab.EmptyValue {
+ continue
+ }
+ nextRowDisplacement++
+ isOverlapped = true
+ break
+ }
+ if isOverlapped {
+ continue
+ }
+
+ rowDisplacement[rInfo.rowNum] = nextRowDisplacement
+ for _, col := range rInfo.nonEmptyCol {
+ entries[nextRowDisplacement+col] = orig.entries[(rInfo.rowNum*orig.colCount)+col]
+ bounds[nextRowDisplacement+col] = rInfo.rowNum
+ }
+ resultBottom = nextRowDisplacement + orig.colCount
+ nextRowDisplacement++
+ break
+ }
+ }
+ }
+
+ tab.OriginalRowCount = orig.rowCount
+ tab.OriginalColCount = orig.colCount
+ tab.Entries = entries[:resultBottom]
+ tab.Bounds = bounds[:resultBottom]
+ tab.RowDisplacement = rowDisplacement
+
+ return nil
+}
diff --git a/compressor/compressor_test.go b/compressor/compressor_test.go
new file mode 100644
index 0000000..621b731
--- /dev/null
+++ b/compressor/compressor_test.go
@@ -0,0 +1,122 @@
+package compressor
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestCompressor_Compress(t *testing.T) {
+ x := 0 // an empty value
+
+ allCompressors := func() []Compressor {
+ return []Compressor{
+ NewUniqueEntriesTable(),
+ NewRowDisplacementTable(x),
+ }
+ }
+
+ tests := []struct {
+ original []int
+ rowCount int
+ colCount int
+ compressors []Compressor
+ }{
+ {
+ original: []int{
+ 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ x, x, x, x, x,
+ x, x, x, x, x,
+ x, x, x, x, x,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ 1, 1, 1, 1, 1,
+ x, x, x, x, x,
+ 1, 1, 1, 1, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ {
+ original: []int{
+ 1, x, 1, 1, 1,
+ 1, 1, x, 1, 1,
+ 1, 1, 1, x, 1,
+ },
+ rowCount: 3,
+ colCount: 5,
+ compressors: allCompressors(),
+ },
+ }
+ for i, tt := range tests {
+ for _, comp := range tt.compressors {
+ t.Run(fmt.Sprintf("%T #%v", comp, i), func(t *testing.T) {
+ dup := make([]int, len(tt.original))
+ copy(dup, tt.original)
+
+ orig, err := NewOriginalTable(tt.original, tt.colCount)
+ if err != nil {
+ t.Fatal(err)
+ }
+ err = comp.Compress(orig)
+ if err != nil {
+ t.Fatal(err)
+ }
+ rowCount, colCount := comp.OriginalTableSize()
+ if rowCount != tt.rowCount || colCount != tt.colCount {
+ t.Fatalf("unexpected table size; want: %vx%v, got: %vx%v", tt.rowCount, tt.colCount, rowCount, colCount)
+ }
+ for i := 0; i < tt.rowCount; i++ {
+ for j := 0; j < tt.colCount; j++ {
+ v, err := comp.Lookup(i, j)
+ if err != nil {
+ t.Fatal(err)
+ }
+ expected := tt.original[i*tt.colCount+j]
+ if v != expected {
+ t.Fatalf("unexpected entry (%v, %v); want: %v, got: %v", i, j, expected, v)
+ }
+ }
+ }
+
+ // Calling with out-of-range indexes should be an error.
+ if _, err := comp.Lookup(0, -1); err == nil {
+ t.Fatalf("expected error didn't occur (0, -1)")
+ }
+ if _, err := comp.Lookup(-1, 0); err == nil {
+ t.Fatalf("expected error didn't occur (-1, 0)")
+ }
+ if _, err := comp.Lookup(rowCount-1, colCount); err == nil {
+ t.Fatalf("expected error didn't occur (%v, %v)", rowCount-1, colCount)
+ }
+ if _, err := comp.Lookup(rowCount, colCount-1); err == nil {
+ t.Fatalf("expected error didn't occur (%v, %v)", rowCount, colCount-1)
+ }
+
+ // The compressor must not break the original table.
+ for i := 0; i < tt.rowCount; i++ {
+ for j := 0; j < tt.colCount; j++ {
+ idx := i*tt.colCount + j
+ if tt.original[idx] != dup[idx] {
+ t.Fatalf("the original table is broken (%v, %v); want: %v, got: %v", i, j, dup[idx], tt.original[idx])
+ }
+ }
+ }
+ })
+ }
+ }
+}
diff --git a/driver/lexer.go b/driver/lexer.go
index 486ff7d..485bfd9 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -255,12 +255,8 @@ func (l *lexer) next() (*Token, error) {
}
buf = append(buf, v)
unfixedBufLen++
- entry := spec.DFA.Transition[state]
- if len(entry) == 0 {
- return nil, fmt.Errorf("no transition entry; state: %v", state)
- }
- nextState := entry[v]
- if nextState == 0 {
+ nextState, ok := l.lookupNextState(mode, state, int(v))
+ if !ok {
if tok != nil {
l.unread(unfixedBufLen)
return tok, nil
@@ -276,6 +272,16 @@ func (l *lexer) next() (*Token, error) {
}
}
+func (l *lexer) lookupNextState(mode spec.LexModeNum, state int, v int) (int, bool) {
+ tab := l.clspec.Specs[mode].DFA.Transition
+ rowNum := tab.RowNums[state]
+ d := tab.UniqueEntries.RowDisplacement[rowNum]
+ if tab.UniqueEntries.Bounds[d+v] != rowNum {
+ return tab.UniqueEntries.EmptyValue, false
+ }
+ return tab.UniqueEntries.Entries[d+v], true
+}
+
func (l *lexer) mode() spec.LexModeNum {
return l.modeStack[len(l.modeStack)-1]
}
diff --git a/spec/spec.go b/spec/spec.go
index e2291e9..1dba132 100644
--- a/spec/spec.go
+++ b/spec/spec.go
@@ -152,10 +152,30 @@ func (s *LexSpec) Validate() error {
return nil
}
+type RowDisplacementTable struct {
+ OriginalRowCount int `json:"original_row_count"`
+ OriginalColCount int `json:"original_col_count"`
+ EmptyValue int `json:"empty_value"`
+ Entries []int `json:"entries"`
+ Bounds []int `json:"bounds"`
+ RowDisplacement []int `json:"row_displacement"`
+}
+
+type UniqueEntriesTable struct {
+ UniqueEntries *RowDisplacementTable `json:"unique_entries"`
+ RowNums []int `json:"row_nums"`
+ OriginalRowCount int `json:"original_row_count"`
+ OriginalColCount int `json:"original_col_count"`
+ EmptyValue int `json:"empty_value"`
+}
+
type TransitionTable struct {
- InitialState int `json:"initial_state"`
- AcceptingStates map[int]int `json:"accepting_states"`
- Transition [][]int `json:"transition"`
+ InitialState int `json:"initial_state"`
+ AcceptingStates map[int]int `json:"accepting_states"`
+ RowCount int `json:"row_count"`
+ ColCount int `json:"col_count"`
+ Transition *UniqueEntriesTable `json:"transition,omitempty"`
+ UncompressedTransition []int `json:"uncompressed_transition,omitempty"`
}
type CompiledLexModeSpec struct {