aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--spec/spec.go382
-rw-r--r--spec/spec_test.go219
-rw-r--r--spec/util.go21
-rw-r--r--src/tre.go421
-rw-r--r--tests/tre.go223
5 files changed, 628 insertions, 638 deletions
diff --git a/spec/spec.go b/spec/spec.go
deleted file mode 100644
index 28d5abc..0000000
--- a/spec/spec.go
+++ /dev/null
@@ -1,382 +0,0 @@
-package spec
-
-import (
- "fmt"
- "regexp"
- "sort"
- "strconv"
- "strings"
-)
-
-// LexKindID represents an ID of a lexical kind and is unique across all modes.
-type LexKindID int
-
-const (
- LexKindIDNil = LexKindID(0)
- LexKindIDMin = LexKindID(1)
-)
-
-func (id LexKindID) Int() int {
- return int(id)
-}
-
-// LexModeKindID represents an ID of a lexical kind and is unique within a mode.
-// Use LexKindID to identify a kind across all modes uniquely.
-type LexModeKindID int
-
-const (
- LexModeKindIDNil = LexModeKindID(0)
- LexModeKindIDMin = LexModeKindID(1)
-)
-
-func (id LexModeKindID) Int() int {
- return int(id)
-}
-
-// LexKindName represents a name of a lexical kind.
-type LexKindName string
-
-const LexKindNameNil = LexKindName("")
-
-func (k LexKindName) String() string {
- return string(k)
-}
-
-func (k LexKindName) validate() error {
- err := validateIdentifier(k.String())
- if err != nil {
- return fmt.Errorf("invalid kind name: %v", err)
- }
- return nil
-}
-
-// LexPattern represents a pattern of a lexeme.
-// The pattern is written in regular expression.
-type LexPattern string
-
-func (p LexPattern) validate() error {
- if p == "" {
- return fmt.Errorf("pattern doesn't allow to be the empty string")
- }
- return nil
-}
-
-// LexModeID represents an ID of a lex mode.
-type LexModeID int
-
-const (
- LexModeIDNil = LexModeID(0)
- LexModeIDDefault = LexModeID(1)
-)
-
-func (n LexModeID) String() string {
- return strconv.Itoa(int(n))
-}
-
-func (n LexModeID) Int() int {
- return int(n)
-}
-
-func (n LexModeID) IsNil() bool {
- return n == LexModeIDNil
-}
-
-// LexModeName represents a name of a lex mode.
-type LexModeName string
-
-const (
- LexModeNameNil = LexModeName("")
- LexModeNameDefault = LexModeName("default")
-)
-
-func (m LexModeName) String() string {
- return string(m)
-}
-
-func (m LexModeName) validate() error {
- err := validateIdentifier(m.String())
- if err != nil {
- return fmt.Errorf("invalid mode name: %v", err)
- }
- return nil
-}
-
-const idPattern = `^[a-z](_?[0-9a-z]+)*$`
-
-var idRE = regexp.MustCompile(idPattern)
-
-func validateIdentifier(id string) error {
- if id == "" {
- return fmt.Errorf("identifier doesn't allow to be the empty string")
- }
- if !idRE.MatchString(id) {
- return fmt.Errorf("identifier must be %v", idPattern)
- }
- return nil
-}
-
-func SnakeCaseToUpperCamelCase(snake string) string {
- elems := strings.Split(snake, "_")
- for i, e := range elems {
- if len(e) == 0 {
- continue
- }
- elems[i] = strings.ToUpper(string(e[0])) + e[1:]
- }
-
- return strings.Join(elems, "")
-}
-
-type LexEntry struct {
- Kind LexKindName `json:"kind"`
- Pattern LexPattern `json:"pattern"`
- Modes []LexModeName `json:"modes"`
- Push LexModeName `json:"push"`
- Pop bool `json:"pop"`
- Fragment bool `json:"fragment"`
-}
-
-func (e *LexEntry) validate() error {
- err := e.Kind.validate()
- if err != nil {
- return err
- }
- err = e.Pattern.validate()
- if err != nil {
- return err
- }
- if len(e.Modes) > 0 {
- for _, mode := range e.Modes {
- err = mode.validate()
- if err != nil {
- return err
- }
- }
- }
- return nil
-}
-
-type LexSpec struct {
- Name string `json:"name"`
- Entries []*LexEntry `json:"entries"`
-}
-
-func (s *LexSpec) Validate() error {
- err := validateIdentifier(s.Name)
- if err != nil {
- return fmt.Errorf("invalid specification name: %v", err)
- }
-
- if len(s.Entries) <= 0 {
- return fmt.Errorf("the lexical specification must have at least one entry")
- }
- {
- var errs []error
- for i, e := range s.Entries {
- err := e.validate()
- if err != nil {
- errs = append(errs, fmt.Errorf("entry #%v: %w", i+1, err))
- }
- }
- if len(errs) > 0 {
- var b strings.Builder
- fmt.Fprintf(&b, "%v", errs[0])
- for _, err := range errs[1:] {
- fmt.Fprintf(&b, "\n%v", err)
- }
- return fmt.Errorf(b.String())
- }
- }
- {
- ks := map[string]struct{}{}
- fks := map[string]struct{}{}
- for _, e := range s.Entries {
- // Allow duplicate names between fragments and non-fragments.
- if e.Fragment {
- if _, exist := fks[e.Kind.String()]; exist {
- return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
- }
- fks[e.Kind.String()] = struct{}{}
- } else {
- if _, exist := ks[e.Kind.String()]; exist {
- return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
- }
- ks[e.Kind.String()] = struct{}{}
- }
- }
- }
- {
- kinds := []string{}
- modes := []string{
- LexModeNameDefault.String(), // This is a predefined mode.
- }
- for _, e := range s.Entries {
- if e.Fragment {
- continue
- }
-
- kinds = append(kinds, e.Kind.String())
-
- for _, m := range e.Modes {
- modes = append(modes, m.String())
- }
- }
-
- kindErrs := findSpellingInconsistenciesErrors(kinds, nil)
- modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error {
- if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(LexModeNameDefault.String()) {
- var b strings.Builder
- fmt.Fprintf(&b, "%+v", ids[0])
- for _, id := range ids[1:] {
- fmt.Fprintf(&b, ", %+v", id)
- }
- return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", LexModeNameDefault, b.String())
- }
- return nil
- })
- errs := append(kindErrs, modeErrs...)
- if len(errs) > 0 {
- var b strings.Builder
- fmt.Fprintf(&b, "%v", errs[0])
- for _, err := range errs[1:] {
- fmt.Fprintf(&b, "\n%v", err)
- }
- return fmt.Errorf(b.String())
- }
- }
-
- return nil
-}
-
-func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error {
- duplicated := FindSpellingInconsistencies(ids)
- if len(duplicated) == 0 {
- return nil
- }
-
- var errs []error
- for _, dup := range duplicated {
- if hook != nil {
- err := hook(dup)
- if err != nil {
- errs = append(errs, err)
- continue
- }
- }
-
- var b strings.Builder
- fmt.Fprintf(&b, "%+v", dup[0])
- for _, id := range dup[1:] {
- fmt.Fprintf(&b, ", %+v", id)
- }
- err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String())
- errs = append(errs, err)
- }
-
- return errs
-}
-
-// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same
-// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same
-// in UpperCamelCase. Thus they are considere to be spelling inconsistency.
-func FindSpellingInconsistencies(ids []string) [][]string {
- m := map[string][]string{}
- for _, id := range removeDuplicates(ids) {
- c := SnakeCaseToUpperCamelCase(id)
- m[c] = append(m[c], id)
- }
-
- var duplicated [][]string
- for _, camels := range m {
- if len(camels) == 1 {
- continue
- }
- duplicated = append(duplicated, camels)
- }
-
- for _, dup := range duplicated {
- sort.Slice(dup, func(i, j int) bool {
- return dup[i] < dup[j]
- })
- }
- sort.Slice(duplicated, func(i, j int) bool {
- return duplicated[i][0] < duplicated[j][0]
- })
-
- return duplicated
-}
-
-func removeDuplicates(s []string) []string {
- m := map[string]struct{}{}
- for _, v := range s {
- m[v] = struct{}{}
- }
-
- var unique []string
- for v := range m {
- unique = append(unique, v)
- }
-
- return unique
-}
-
-// StateID represents an ID of a state of a transition table.
-type StateID int
-
-const (
- // StateIDNil represents an empty entry of a transition table.
- // When the driver reads this value, it raises an error meaning lexical analysis failed.
- StateIDNil = StateID(0)
-
- // StateIDMin is the minimum value of the state ID. All valid state IDs are represented as
- // sequential numbers starting from this value.
- StateIDMin = StateID(1)
-)
-
-func (id StateID) Int() int {
- return int(id)
-}
-
-type RowDisplacementTable struct {
- OriginalRowCount int `json:"original_row_count"`
- OriginalColCount int `json:"original_col_count"`
- EmptyValue StateID `json:"empty_value"`
- Entries []StateID `json:"entries"`
- Bounds []int `json:"bounds"`
- RowDisplacement []int `json:"row_displacement"`
-}
-
-type UniqueEntriesTable struct {
- UniqueEntries *RowDisplacementTable `json:"unique_entries,omitempty"`
- UncompressedUniqueEntries []StateID `json:"uncompressed_unique_entries,omitempty"`
- RowNums []int `json:"row_nums"`
- OriginalRowCount int `json:"original_row_count"`
- OriginalColCount int `json:"original_col_count"`
- EmptyValue int `json:"empty_value"`
-}
-
-type TransitionTable struct {
- InitialStateID StateID `json:"initial_state_id"`
- AcceptingStates []LexModeKindID `json:"accepting_states"`
- RowCount int `json:"row_count"`
- ColCount int `json:"col_count"`
- Transition *UniqueEntriesTable `json:"transition,omitempty"`
- UncompressedTransition []StateID `json:"uncompressed_transition,omitempty"`
-}
-
-type CompiledLexModeSpec struct {
- KindNames []LexKindName `json:"kind_names"`
- Push []LexModeID `json:"push"`
- Pop []int `json:"pop"`
- DFA *TransitionTable `json:"dfa"`
-}
-
-type CompiledLexSpec struct {
- Name string `json:"name"`
- InitialModeID LexModeID `json:"initial_mode_id"`
- ModeNames []LexModeName `json:"mode_names"`
- KindNames []LexKindName `json:"kind_names"`
- KindIDs [][]LexKindID `json:"kind_ids"`
- CompressionLevel int `json:"compression_level"`
- Specs []*CompiledLexModeSpec `json:"specs"`
-}
diff --git a/spec/spec_test.go b/spec/spec_test.go
deleted file mode 100644
index e0e920e..0000000
--- a/spec/spec_test.go
+++ /dev/null
@@ -1,219 +0,0 @@
-package spec
-
-import (
- "fmt"
- "testing"
-)
-
-var idTests = []struct {
- id string
- invalid bool
-}{
- {
- id: "foo",
- },
- {
- id: "foo2",
- },
- {
- id: "foo_bar_baz",
- },
- {
- id: "f_o_o",
- },
- {
- id: "Foo",
- invalid: true,
- },
- {
- id: "foo_Bar",
- invalid: true,
- },
- {
- id: "2foo",
- invalid: true,
- },
- {
- id: "_foo",
- invalid: true,
- },
- {
- id: "foo_",
- invalid: true,
- },
- {
- id: "foo__bar",
- invalid: true,
- },
-}
-
-func TestValidateIdentifier(t *testing.T) {
- for _, tt := range idTests {
- t.Run(tt.id, func(t *testing.T) {
- err := validateIdentifier(tt.id)
- if tt.invalid {
- if err == nil {
- t.Errorf("expected error didn't occur")
- }
- } else {
- if err != nil {
- t.Errorf("unexpected error occurred: %v", err)
- }
- }
- })
- }
-}
-
-func TestLexKindName_validate(t *testing.T) {
- for _, tt := range idTests {
- t.Run(tt.id, func(t *testing.T) {
- err := LexKindName(tt.id).validate()
- if tt.invalid {
- if err == nil {
- t.Errorf("expected error didn't occur")
- }
- } else {
- if err != nil {
- t.Errorf("unexpected error occurred: %v", err)
- }
- }
- })
- }
-}
-
-func TestLexModeName_validate(t *testing.T) {
- for _, tt := range idTests {
- t.Run(tt.id, func(t *testing.T) {
- err := LexModeName(tt.id).validate()
- if tt.invalid {
- if err == nil {
- t.Errorf("expected error didn't occur")
- }
- } else {
- if err != nil {
- t.Errorf("unexpected error occurred: %v", err)
- }
- }
- })
- }
-}
-
-func TestSnakeCaseToUpperCamelCase(t *testing.T) {
- tests := []struct {
- snake string
- camel string
- }{
- {
- snake: "foo",
- camel: "Foo",
- },
- {
- snake: "foo_bar",
- camel: "FooBar",
- },
- {
- snake: "foo_bar_baz",
- camel: "FooBarBaz",
- },
- {
- snake: "Foo",
- camel: "Foo",
- },
- {
- snake: "fooBar",
- camel: "FooBar",
- },
- {
- snake: "FOO",
- camel: "FOO",
- },
- {
- snake: "FOO_BAR",
- camel: "FOOBAR",
- },
- {
- snake: "_foo_bar_",
- camel: "FooBar",
- },
- {
- snake: "___foo___bar___",
- camel: "FooBar",
- },
- }
- for _, tt := range tests {
- c := SnakeCaseToUpperCamelCase(tt.snake)
- if c != tt.camel {
- t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c)
- }
- }
-}
-
-func TestFindSpellingInconsistencies(t *testing.T) {
- tests := []struct {
- ids []string
- duplicated [][]string
- }{
- {
- ids: []string{"foo", "foo"},
- duplicated: nil,
- },
- {
- ids: []string{"foo", "Foo"},
- duplicated: [][]string{{"Foo", "foo"}},
- },
- {
- ids: []string{"foo", "foo", "Foo"},
- duplicated: [][]string{{"Foo", "foo"}},
- },
- {
- ids: []string{"foo_bar_baz", "FooBarBaz"},
- duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}},
- },
- {
- ids: []string{"foo", "Foo", "bar", "Bar"},
- duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
- },
- {
- ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"},
- duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
- },
- }
- for i, tt := range tests {
- t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
- duplicated := FindSpellingInconsistencies(tt.ids)
- if len(duplicated) != len(tt.duplicated) {
- t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated)
- }
- for i, dupIDs := range duplicated {
- if len(dupIDs) != len(tt.duplicated[i]) {
- t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
- }
- for j, id := range dupIDs {
- if id != tt.duplicated[i][j] {
- t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
- }
- }
- }
- })
- }
-}
-
-func TestLexSpec_Validate(t *testing.T) {
- // We expect that the spelling inconsistency error will occur.
- spec := &LexSpec{
- Entries: []*LexEntry{
- {
- Modes: []LexModeName{
- // 'Default' is the spelling inconsistency because 'default' is predefined.
- "Default",
- },
- Kind: "foo",
- Pattern: "foo",
- },
- },
- }
- err := spec.Validate()
- if err == nil {
- t.Fatalf("expected error didn't occur")
- }
-}
diff --git a/spec/util.go b/spec/util.go
deleted file mode 100644
index 56e9121..0000000
--- a/spec/util.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package spec
-
-import "strings"
-
-var rep = strings.NewReplacer(
- `.`, `\.`,
- `*`, `\*`,
- `+`, `\+`,
- `?`, `\?`,
- `|`, `\|`,
- `(`, `\(`,
- `)`, `\)`,
- `[`, `\[`,
- `\`, `\\`,
-)
-
-// EscapePattern escapes the special characters.
-// For example, EscapePattern(`+`) returns `\+`.
-func EscapePattern(s string) string {
- return rep.Replace(s)
-}
diff --git a/src/tre.go b/src/tre.go
index cab7784..2350a52 100644
--- a/src/tre.go
+++ b/src/tre.go
@@ -3,6 +3,8 @@ package tre
import (
"encoding/binary"
"fmt"
+ "regexp"
+ "strconv"
"strings"
"sort"
)
@@ -174,33 +176,33 @@ type Compressor interface {
}
var (
- _ Compressor = &UniqueEntriesTable{}
- _ Compressor = &RowDisplacementTable{}
+ _ Compressor = &CompressorUniqueEntriesTable{}
+ _ Compressor = &CompressorRowDisplacementTable{}
)
-type UniqueEntriesTable struct {
+type CompressorUniqueEntriesTable struct {
UniqueEntries []int
RowNums []int
OriginalRowCount int
OriginalColCount int
}
-func NewUniqueEntriesTable() *UniqueEntriesTable {
- return &UniqueEntriesTable{}
+func NewCompressorUniqueEntriesTable() *CompressorUniqueEntriesTable {
+ return &CompressorUniqueEntriesTable{}
}
-func (tab *UniqueEntriesTable) Lookup(row, col int) (int, error) {
+func (tab *CompressorUniqueEntriesTable) Lookup(row, col int) (int, error) {
if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
return 0, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
}
return tab.UniqueEntries[tab.RowNums[row]*tab.OriginalColCount+col], nil
}
-func (tab *UniqueEntriesTable) OriginalTableSize() (int, int) {
+func (tab *CompressorUniqueEntriesTable) OriginalTableSize() (int, int) {
return tab.OriginalRowCount, tab.OriginalColCount
}
-func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error {
+func (tab *CompressorUniqueEntriesTable) Compress(orig *OriginalTable) error {
var uniqueEntries []int
rowNums := make([]int, orig.rowCount)
hash2RowNum := map[string]int{}
@@ -238,7 +240,7 @@ func (tab *UniqueEntriesTable) Compress(orig *OriginalTable) error {
const ForbiddenValue = -1
-type RowDisplacementTable struct {
+type CompressorRowDisplacementTable struct {
OriginalRowCount int
OriginalColCount int
EmptyValue int
@@ -247,13 +249,13 @@ type RowDisplacementTable struct {
RowDisplacement []int
}
-func NewRowDisplacementTable(emptyValue int) *RowDisplacementTable {
- return &RowDisplacementTable{
+func NewCompressorRowDisplacementTable(emptyValue int) *CompressorRowDisplacementTable {
+ return &CompressorRowDisplacementTable{
EmptyValue: emptyValue,
}
}
-func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) {
+func (tab *CompressorRowDisplacementTable) Lookup(row int, col int) (int, error) {
if row < 0 || row >= tab.OriginalRowCount || col < 0 || col >= tab.OriginalColCount {
return tab.EmptyValue, fmt.Errorf("indexes are out of range: [%v, %v]", row, col)
}
@@ -264,7 +266,7 @@ func (tab *RowDisplacementTable) Lookup(row int, col int) (int, error) {
return tab.Entries[d+col], nil
}
-func (tab *RowDisplacementTable) OriginalTableSize() (int, int) {
+func (tab *CompressorRowDisplacementTable) OriginalTableSize() (int, int) {
return tab.OriginalRowCount, tab.OriginalColCount
}
@@ -274,7 +276,7 @@ type rowInfo struct {
nonEmptyCol []int
}
-func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error {
+func (tab *CompressorRowDisplacementTable) Compress(orig *OriginalTable) error {
rowInfo := make([]rowInfo, orig.rowCount)
{
row := 0
@@ -354,3 +356,394 @@ func (tab *RowDisplacementTable) Compress(orig *OriginalTable) error {
func Main() {
}
+
+var rep = strings.NewReplacer(
+ `.`, `\.`,
+ `*`, `\*`,
+ `+`, `\+`,
+ `?`, `\?`,
+ `|`, `\|`,
+ `(`, `\(`,
+ `)`, `\)`,
+ `[`, `\[`,
+ `\`, `\\`,
+)
+
+// EscapePattern escapes the special characters.
+// For example, EscapePattern(`+`) returns `\+`.
+func EscapePattern(s string) string {
+ return rep.Replace(s)
+}
+
+// LexKindID represents an ID of a lexical kind and is unique across all modes.
+type LexKindID int
+
+const (
+ LexKindIDNil = LexKindID(0)
+ LexKindIDMin = LexKindID(1)
+)
+
+func (id LexKindID) Int() int {
+ return int(id)
+}
+
+// LexModeKindID represents an ID of a lexical kind and is unique within a mode.
+// Use LexKindID to identify a kind across all modes uniquely.
+type LexModeKindID int
+
+const (
+ LexModeKindIDNil = LexModeKindID(0)
+ LexModeKindIDMin = LexModeKindID(1)
+)
+
+func (id LexModeKindID) Int() int {
+ return int(id)
+}
+
+// LexKindName represents a name of a lexical kind.
+type LexKindName string
+
+const LexKindNameNil = LexKindName("")
+
+func (k LexKindName) String() string {
+ return string(k)
+}
+
+func (k LexKindName) validate() error {
+ err := validateIdentifier(k.String())
+ if err != nil {
+ return fmt.Errorf("invalid kind name: %v", err)
+ }
+ return nil
+}
+
+// LexPattern represents a pattern of a lexeme.
+// The pattern is written in regular expression.
+type LexPattern string
+
+func (p LexPattern) validate() error {
+ if p == "" {
+ return fmt.Errorf("pattern doesn't allow to be the empty string")
+ }
+ return nil
+}
+
+// LexModeID represents an ID of a lex mode.
+type LexModeID int
+
+const (
+ LexModeIDNil = LexModeID(0)
+ LexModeIDDefault = LexModeID(1)
+)
+
+func (n LexModeID) String() string {
+ return strconv.Itoa(int(n))
+}
+
+func (n LexModeID) Int() int {
+ return int(n)
+}
+
+func (n LexModeID) IsNil() bool {
+ return n == LexModeIDNil
+}
+
+// LexModeName represents a name of a lex mode.
+type LexModeName string
+
+const (
+ LexModeNameNil = LexModeName("")
+ LexModeNameDefault = LexModeName("default")
+)
+
+func (m LexModeName) String() string {
+ return string(m)
+}
+
+func (m LexModeName) validate() error {
+ err := validateIdentifier(m.String())
+ if err != nil {
+ return fmt.Errorf("invalid mode name: %v", err)
+ }
+ return nil
+}
+
+const idPattern = `^[a-z](_?[0-9a-z]+)*$`
+
+var idRE = regexp.MustCompile(idPattern)
+
+func validateIdentifier(id string) error {
+ if id == "" {
+ return fmt.Errorf("identifier doesn't allow to be the empty string")
+ }
+ if !idRE.MatchString(id) {
+ return fmt.Errorf("identifier must be %v", idPattern)
+ }
+ return nil
+}
+
+func SnakeCaseToUpperCamelCase(snake string) string {
+ elems := strings.Split(snake, "_")
+ for i, e := range elems {
+ if len(e) == 0 {
+ continue
+ }
+ elems[i] = strings.ToUpper(string(e[0])) + e[1:]
+ }
+
+ return strings.Join(elems, "")
+}
+
+type LexEntry struct {
+ Kind LexKindName `json:"kind"`
+ Pattern LexPattern `json:"pattern"`
+ Modes []LexModeName `json:"modes"`
+ Push LexModeName `json:"push"`
+ Pop bool `json:"pop"`
+ Fragment bool `json:"fragment"`
+}
+
+func (e *LexEntry) validate() error {
+ err := e.Kind.validate()
+ if err != nil {
+ return err
+ }
+ err = e.Pattern.validate()
+ if err != nil {
+ return err
+ }
+ if len(e.Modes) > 0 {
+ for _, mode := range e.Modes {
+ err = mode.validate()
+ if err != nil {
+ return err
+ }
+ }
+ }
+ return nil
+}
+
+type LexSpec struct {
+ Name string `json:"name"`
+ Entries []*LexEntry `json:"entries"`
+}
+
+func (s *LexSpec) Validate() error {
+ err := validateIdentifier(s.Name)
+ if err != nil {
+ return fmt.Errorf("invalid specification name: %v", err)
+ }
+
+ if len(s.Entries) <= 0 {
+ return fmt.Errorf("the lexical specification must have at least one entry")
+ }
+ {
+ var errs []error
+ for i, e := range s.Entries {
+ err := e.validate()
+ if err != nil {
+ errs = append(errs, fmt.Errorf("entry #%v: %w", i+1, err))
+ }
+ }
+ if len(errs) > 0 {
+ var b strings.Builder
+ fmt.Fprintf(&b, "%v", errs[0])
+ for _, err := range errs[1:] {
+ fmt.Fprintf(&b, "\n%v", err)
+ }
+ return fmt.Errorf(b.String())
+ }
+ }
+ {
+ ks := map[string]struct{}{}
+ fks := map[string]struct{}{}
+ for _, e := range s.Entries {
+ // Allow duplicate names between fragments and non-fragments.
+ if e.Fragment {
+ if _, exist := fks[e.Kind.String()]; exist {
+ return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
+ }
+ fks[e.Kind.String()] = struct{}{}
+ } else {
+ if _, exist := ks[e.Kind.String()]; exist {
+ return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
+ }
+ ks[e.Kind.String()] = struct{}{}
+ }
+ }
+ }
+ {
+ kinds := []string{}
+ modes := []string{
+ LexModeNameDefault.String(), // This is a predefined mode.
+ }
+ for _, e := range s.Entries {
+ if e.Fragment {
+ continue
+ }
+
+ kinds = append(kinds, e.Kind.String())
+
+ for _, m := range e.Modes {
+ modes = append(modes, m.String())
+ }
+ }
+
+ kindErrs := findSpellingInconsistenciesErrors(kinds, nil)
+ modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error {
+ if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(LexModeNameDefault.String()) {
+ var b strings.Builder
+ fmt.Fprintf(&b, "%+v", ids[0])
+ for _, id := range ids[1:] {
+ fmt.Fprintf(&b, ", %+v", id)
+ }
+ return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", LexModeNameDefault, b.String())
+ }
+ return nil
+ })
+ errs := append(kindErrs, modeErrs...)
+ if len(errs) > 0 {
+ var b strings.Builder
+ fmt.Fprintf(&b, "%v", errs[0])
+ for _, err := range errs[1:] {
+ fmt.Fprintf(&b, "\n%v", err)
+ }
+ return fmt.Errorf(b.String())
+ }
+ }
+
+ return nil
+}
+
+func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error {
+ duplicated := FindSpellingInconsistencies(ids)
+ if len(duplicated) == 0 {
+ return nil
+ }
+
+ var errs []error
+ for _, dup := range duplicated {
+ if hook != nil {
+ err := hook(dup)
+ if err != nil {
+ errs = append(errs, err)
+ continue
+ }
+ }
+
+ var b strings.Builder
+ fmt.Fprintf(&b, "%+v", dup[0])
+ for _, id := range dup[1:] {
+ fmt.Fprintf(&b, ", %+v", id)
+ }
+ err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String())
+ errs = append(errs, err)
+ }
+
+ return errs
+}
+
+// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same
+// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same
+// in UpperCamelCase. Thus they are considere to be spelling inconsistency.
+func FindSpellingInconsistencies(ids []string) [][]string {
+ m := map[string][]string{}
+ for _, id := range removeDuplicates(ids) {
+ c := SnakeCaseToUpperCamelCase(id)
+ m[c] = append(m[c], id)
+ }
+
+ var duplicated [][]string
+ for _, camels := range m {
+ if len(camels) == 1 {
+ continue
+ }
+ duplicated = append(duplicated, camels)
+ }
+
+ for _, dup := range duplicated {
+ sort.Slice(dup, func(i, j int) bool {
+ return dup[i] < dup[j]
+ })
+ }
+ sort.Slice(duplicated, func(i, j int) bool {
+ return duplicated[i][0] < duplicated[j][0]
+ })
+
+ return duplicated
+}
+
+func removeDuplicates(s []string) []string {
+ m := map[string]struct{}{}
+ for _, v := range s {
+ m[v] = struct{}{}
+ }
+
+ var unique []string
+ for v := range m {
+ unique = append(unique, v)
+ }
+
+ return unique
+}
+
+// StateID represents an ID of a state of a transition table.
+type StateID int
+
+const (
+ // StateIDNil represents an empty entry of a transition table.
+ // When the driver reads this value, it raises an error meaning lexical analysis failed.
+ StateIDNil = StateID(0)
+
+ // StateIDMin is the minimum value of the state ID. All valid state IDs are represented as
+ // sequential numbers starting from this value.
+ StateIDMin = StateID(1)
+)
+
+func (id StateID) Int() int {
+ return int(id)
+}
+
+type SpecRowDisplacementTable struct {
+ OriginalRowCount int `json:"original_row_count"`
+ OriginalColCount int `json:"original_col_count"`
+ EmptyValue StateID `json:"empty_value"`
+ Entries []StateID `json:"entries"`
+ Bounds []int `json:"bounds"`
+ RowDisplacement []int `json:"row_displacement"`
+}
+
+type SpecUniqueEntriesTable struct {
+ UniqueEntries *SpecRowDisplacementTable `json:"unique_entries,omitempty"`
+ UncompressedUniqueEntries []StateID `json:"uncompressed_unique_entries,omitempty"`
+ RowNums []int `json:"row_nums"`
+ OriginalRowCount int `json:"original_row_count"`
+ OriginalColCount int `json:"original_col_count"`
+ EmptyValue int `json:"empty_value"`
+}
+
+type TransitionTable struct {
+ InitialStateID StateID `json:"initial_state_id"`
+ AcceptingStates []LexModeKindID `json:"accepting_states"`
+ RowCount int `json:"row_count"`
+ ColCount int `json:"col_count"`
+ Transition *SpecUniqueEntriesTable `json:"transition,omitempty"`
+ UncompressedTransition []StateID `json:"uncompressed_transition,omitempty"`
+}
+
+type CompiledLexModeSpec struct {
+ KindNames []LexKindName `json:"kind_names"`
+ Push []LexModeID `json:"push"`
+ Pop []int `json:"pop"`
+ DFA *TransitionTable `json:"dfa"`
+}
+
+type CompiledLexSpec struct {
+ Name string `json:"name"`
+ InitialModeID LexModeID `json:"initial_mode_id"`
+ ModeNames []LexModeName `json:"mode_names"`
+ KindNames []LexKindName `json:"kind_names"`
+ KindIDs [][]LexKindID `json:"kind_ids"`
+ CompressionLevel int `json:"compression_level"`
+ Specs []*CompiledLexModeSpec `json:"specs"`
+}
diff --git a/tests/tre.go b/tests/tre.go
index 9029259..8c14feb 100644
--- a/tests/tre.go
+++ b/tests/tre.go
@@ -245,8 +245,8 @@ func TestCompressor_Compress(t *testing.T) {
allCompressors := func() []Compressor {
return []Compressor{
- NewUniqueEntriesTable(),
- NewRowDisplacementTable(x),
+ NewCompressorUniqueEntriesTable(),
+ NewCompressorRowDisplacementTable(x),
}
}
@@ -356,6 +356,219 @@ func TestCompressor_Compress(t *testing.T) {
}
}
+var idTests = []struct {
+ id string
+ invalid bool
+}{
+ {
+ id: "foo",
+ },
+ {
+ id: "foo2",
+ },
+ {
+ id: "foo_bar_baz",
+ },
+ {
+ id: "f_o_o",
+ },
+ {
+ id: "Foo",
+ invalid: true,
+ },
+ {
+ id: "foo_Bar",
+ invalid: true,
+ },
+ {
+ id: "2foo",
+ invalid: true,
+ },
+ {
+ id: "_foo",
+ invalid: true,
+ },
+ {
+ id: "foo_",
+ invalid: true,
+ },
+ {
+ id: "foo__bar",
+ invalid: true,
+ },
+}
+
+func TestValidateIdentifier(t *testing.T) {
+ for _, tt := range idTests {
+ t.Run(tt.id, func(t *testing.T) {
+ err := validateIdentifier(tt.id)
+ if tt.invalid {
+ if err == nil {
+ t.Errorf("expected error didn't occur")
+ }
+ } else {
+ if err != nil {
+ t.Errorf("unexpected error occurred: %v", err)
+ }
+ }
+ })
+ }
+}
+
+func TestLexKindName_validate(t *testing.T) {
+ for _, tt := range idTests {
+ t.Run(tt.id, func(t *testing.T) {
+ err := LexKindName(tt.id).validate()
+ if tt.invalid {
+ if err == nil {
+ t.Errorf("expected error didn't occur")
+ }
+ } else {
+ if err != nil {
+ t.Errorf("unexpected error occurred: %v", err)
+ }
+ }
+ })
+ }
+}
+
+func TestLexModeName_validate(t *testing.T) {
+ for _, tt := range idTests {
+ t.Run(tt.id, func(t *testing.T) {
+ err := LexModeName(tt.id).validate()
+ if tt.invalid {
+ if err == nil {
+ t.Errorf("expected error didn't occur")
+ }
+ } else {
+ if err != nil {
+ t.Errorf("unexpected error occurred: %v", err)
+ }
+ }
+ })
+ }
+}
+
+func TestSnakeCaseToUpperCamelCase(t *testing.T) {
+ tests := []struct {
+ snake string
+ camel string
+ }{
+ {
+ snake: "foo",
+ camel: "Foo",
+ },
+ {
+ snake: "foo_bar",
+ camel: "FooBar",
+ },
+ {
+ snake: "foo_bar_baz",
+ camel: "FooBarBaz",
+ },
+ {
+ snake: "Foo",
+ camel: "Foo",
+ },
+ {
+ snake: "fooBar",
+ camel: "FooBar",
+ },
+ {
+ snake: "FOO",
+ camel: "FOO",
+ },
+ {
+ snake: "FOO_BAR",
+ camel: "FOOBAR",
+ },
+ {
+ snake: "_foo_bar_",
+ camel: "FooBar",
+ },
+ {
+ snake: "___foo___bar___",
+ camel: "FooBar",
+ },
+ }
+ for _, tt := range tests {
+ c := SnakeCaseToUpperCamelCase(tt.snake)
+ if c != tt.camel {
+ t.Errorf("unexpected string; want: %v, got: %v", tt.camel, c)
+ }
+ }
+}
+
+func TestFindSpellingInconsistencies(t *testing.T) {
+ tests := []struct {
+ ids []string
+ duplicated [][]string
+ }{
+ {
+ ids: []string{"foo", "foo"},
+ duplicated: nil,
+ },
+ {
+ ids: []string{"foo", "Foo"},
+ duplicated: [][]string{{"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo", "foo", "Foo"},
+ duplicated: [][]string{{"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo_bar_baz", "FooBarBaz"},
+ duplicated: [][]string{{"FooBarBaz", "foo_bar_baz"}},
+ },
+ {
+ ids: []string{"foo", "Foo", "bar", "Bar"},
+ duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
+ },
+ {
+ ids: []string{"foo", "Foo", "bar", "Bar", "baz", "bra"},
+ duplicated: [][]string{{"Bar", "bar"}, {"Foo", "foo"}},
+ },
+ }
+ for i, tt := range tests {
+ t.Run(fmt.Sprintf("#%v", i), func(t *testing.T) {
+ duplicated := FindSpellingInconsistencies(tt.ids)
+ if len(duplicated) != len(tt.duplicated) {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated, duplicated)
+ }
+ for i, dupIDs := range duplicated {
+ if len(dupIDs) != len(tt.duplicated[i]) {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
+ }
+ for j, id := range dupIDs {
+ if id != tt.duplicated[i][j] {
+ t.Fatalf("unexpected IDs; want: %#v, got: %#v", tt.duplicated[i], dupIDs)
+ }
+ }
+ }
+ })
+ }
+}
+
+func TestLexSpec_Validate(t *testing.T) {
+ // We expect that the spelling inconsistency error will occur.
+ spec := &LexSpec{
+ Entries: []*LexEntry{
+ {
+ Modes: []LexModeName{
+ // 'Default' is the spelling inconsistency because 'default' is predefined.
+ "Default",
+ },
+ Kind: "foo",
+ Pattern: "foo",
+ },
+ },
+ }
+ err := spec.Validate()
+ if err == nil {
+ t.Fatalf("expected error didn't occur")
+ }
+}
+
func MainTest() {
@@ -363,6 +576,12 @@ func MainTest() {
{ "TestGenCharBlocksWellFormed", TestGenCharBlocksWellFormed },
{ "TestGenCharBlocksIllFormed", TestGenCharBlocksIllFormed },
{ "TestCompressor_Compress", TestCompressor_Compress },
+ { "TestValidateIdentifier", TestValidateIdentifier },
+ { "TestLexKindName_validate", TestLexKindName_validate },
+ { "TestLexModeName_validate", TestLexModeName_validate },
+ { "TestSnakeCaseToUpperCamelCase", TestSnakeCaseToUpperCamelCase },
+ { "TestFindSpellingInconsistencies", TestFindSpellingInconsistencies },
+ { "TestLexSpec_Validate", TestLexSpec_Validate },
}
deps := testdeps.TestDeps{}