diff options
author | EuAndreh <eu@euandre.org> | 2024-11-29 10:19:45 -0300 |
---|---|---|
committer | EuAndreh <eu@euandre.org> | 2024-11-29 10:19:45 -0300 |
commit | d927e9e048527d0b13d16c1ee9efbd55452f7653 (patch) | |
tree | 51dc9012af477ed873f62e06b737aedf7a8c0000 /spec/spec.go | |
parent | Absorb compressor/ code (diff) | |
download | tre-d927e9e048527d0b13d16c1ee9efbd55452f7653.tar.gz tre-d927e9e048527d0b13d16c1ee9efbd55452f7653.tar.xz |
Absorb spec/
Diffstat (limited to 'spec/spec.go')
-rw-r--r-- | spec/spec.go | 382 |
1 files changed, 0 insertions, 382 deletions
diff --git a/spec/spec.go b/spec/spec.go deleted file mode 100644 index 28d5abc..0000000 --- a/spec/spec.go +++ /dev/null @@ -1,382 +0,0 @@ -package spec - -import ( - "fmt" - "regexp" - "sort" - "strconv" - "strings" -) - -// LexKindID represents an ID of a lexical kind and is unique across all modes. -type LexKindID int - -const ( - LexKindIDNil = LexKindID(0) - LexKindIDMin = LexKindID(1) -) - -func (id LexKindID) Int() int { - return int(id) -} - -// LexModeKindID represents an ID of a lexical kind and is unique within a mode. -// Use LexKindID to identify a kind across all modes uniquely. -type LexModeKindID int - -const ( - LexModeKindIDNil = LexModeKindID(0) - LexModeKindIDMin = LexModeKindID(1) -) - -func (id LexModeKindID) Int() int { - return int(id) -} - -// LexKindName represents a name of a lexical kind. -type LexKindName string - -const LexKindNameNil = LexKindName("") - -func (k LexKindName) String() string { - return string(k) -} - -func (k LexKindName) validate() error { - err := validateIdentifier(k.String()) - if err != nil { - return fmt.Errorf("invalid kind name: %v", err) - } - return nil -} - -// LexPattern represents a pattern of a lexeme. -// The pattern is written in regular expression. -type LexPattern string - -func (p LexPattern) validate() error { - if p == "" { - return fmt.Errorf("pattern doesn't allow to be the empty string") - } - return nil -} - -// LexModeID represents an ID of a lex mode. -type LexModeID int - -const ( - LexModeIDNil = LexModeID(0) - LexModeIDDefault = LexModeID(1) -) - -func (n LexModeID) String() string { - return strconv.Itoa(int(n)) -} - -func (n LexModeID) Int() int { - return int(n) -} - -func (n LexModeID) IsNil() bool { - return n == LexModeIDNil -} - -// LexModeName represents a name of a lex mode. -type LexModeName string - -const ( - LexModeNameNil = LexModeName("") - LexModeNameDefault = LexModeName("default") -) - -func (m LexModeName) String() string { - return string(m) -} - -func (m LexModeName) validate() error { - err := validateIdentifier(m.String()) - if err != nil { - return fmt.Errorf("invalid mode name: %v", err) - } - return nil -} - -const idPattern = `^[a-z](_?[0-9a-z]+)*$` - -var idRE = regexp.MustCompile(idPattern) - -func validateIdentifier(id string) error { - if id == "" { - return fmt.Errorf("identifier doesn't allow to be the empty string") - } - if !idRE.MatchString(id) { - return fmt.Errorf("identifier must be %v", idPattern) - } - return nil -} - -func SnakeCaseToUpperCamelCase(snake string) string { - elems := strings.Split(snake, "_") - for i, e := range elems { - if len(e) == 0 { - continue - } - elems[i] = strings.ToUpper(string(e[0])) + e[1:] - } - - return strings.Join(elems, "") -} - -type LexEntry struct { - Kind LexKindName `json:"kind"` - Pattern LexPattern `json:"pattern"` - Modes []LexModeName `json:"modes"` - Push LexModeName `json:"push"` - Pop bool `json:"pop"` - Fragment bool `json:"fragment"` -} - -func (e *LexEntry) validate() error { - err := e.Kind.validate() - if err != nil { - return err - } - err = e.Pattern.validate() - if err != nil { - return err - } - if len(e.Modes) > 0 { - for _, mode := range e.Modes { - err = mode.validate() - if err != nil { - return err - } - } - } - return nil -} - -type LexSpec struct { - Name string `json:"name"` - Entries []*LexEntry `json:"entries"` -} - -func (s *LexSpec) Validate() error { - err := validateIdentifier(s.Name) - if err != nil { - return fmt.Errorf("invalid specification name: %v", err) - } - - if len(s.Entries) <= 0 { - return fmt.Errorf("the lexical specification must have at least one entry") - } - { - var errs []error - for i, e := range s.Entries { - err := e.validate() - if err != nil { - errs = append(errs, fmt.Errorf("entry #%v: %w", i+1, err)) - } - } - if len(errs) > 0 { - var b strings.Builder - fmt.Fprintf(&b, "%v", errs[0]) - for _, err := range errs[1:] { - fmt.Fprintf(&b, "\n%v", err) - } - return fmt.Errorf(b.String()) - } - } - { - ks := map[string]struct{}{} - fks := map[string]struct{}{} - for _, e := range s.Entries { - // Allow duplicate names between fragments and non-fragments. - if e.Fragment { - if _, exist := fks[e.Kind.String()]; exist { - return fmt.Errorf("kinds `%v` are duplicates", e.Kind) - } - fks[e.Kind.String()] = struct{}{} - } else { - if _, exist := ks[e.Kind.String()]; exist { - return fmt.Errorf("kinds `%v` are duplicates", e.Kind) - } - ks[e.Kind.String()] = struct{}{} - } - } - } - { - kinds := []string{} - modes := []string{ - LexModeNameDefault.String(), // This is a predefined mode. - } - for _, e := range s.Entries { - if e.Fragment { - continue - } - - kinds = append(kinds, e.Kind.String()) - - for _, m := range e.Modes { - modes = append(modes, m.String()) - } - } - - kindErrs := findSpellingInconsistenciesErrors(kinds, nil) - modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error { - if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(LexModeNameDefault.String()) { - var b strings.Builder - fmt.Fprintf(&b, "%+v", ids[0]) - for _, id := range ids[1:] { - fmt.Fprintf(&b, ", %+v", id) - } - return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", LexModeNameDefault, b.String()) - } - return nil - }) - errs := append(kindErrs, modeErrs...) - if len(errs) > 0 { - var b strings.Builder - fmt.Fprintf(&b, "%v", errs[0]) - for _, err := range errs[1:] { - fmt.Fprintf(&b, "\n%v", err) - } - return fmt.Errorf(b.String()) - } - } - - return nil -} - -func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error { - duplicated := FindSpellingInconsistencies(ids) - if len(duplicated) == 0 { - return nil - } - - var errs []error - for _, dup := range duplicated { - if hook != nil { - err := hook(dup) - if err != nil { - errs = append(errs, err) - continue - } - } - - var b strings.Builder - fmt.Fprintf(&b, "%+v", dup[0]) - for _, id := range dup[1:] { - fmt.Fprintf(&b, ", %+v", id) - } - err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String()) - errs = append(errs, err) - } - - return errs -} - -// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same -// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same -// in UpperCamelCase. Thus they are considere to be spelling inconsistency. -func FindSpellingInconsistencies(ids []string) [][]string { - m := map[string][]string{} - for _, id := range removeDuplicates(ids) { - c := SnakeCaseToUpperCamelCase(id) - m[c] = append(m[c], id) - } - - var duplicated [][]string - for _, camels := range m { - if len(camels) == 1 { - continue - } - duplicated = append(duplicated, camels) - } - - for _, dup := range duplicated { - sort.Slice(dup, func(i, j int) bool { - return dup[i] < dup[j] - }) - } - sort.Slice(duplicated, func(i, j int) bool { - return duplicated[i][0] < duplicated[j][0] - }) - - return duplicated -} - -func removeDuplicates(s []string) []string { - m := map[string]struct{}{} - for _, v := range s { - m[v] = struct{}{} - } - - var unique []string - for v := range m { - unique = append(unique, v) - } - - return unique -} - -// StateID represents an ID of a state of a transition table. -type StateID int - -const ( - // StateIDNil represents an empty entry of a transition table. - // When the driver reads this value, it raises an error meaning lexical analysis failed. - StateIDNil = StateID(0) - - // StateIDMin is the minimum value of the state ID. All valid state IDs are represented as - // sequential numbers starting from this value. - StateIDMin = StateID(1) -) - -func (id StateID) Int() int { - return int(id) -} - -type RowDisplacementTable struct { - OriginalRowCount int `json:"original_row_count"` - OriginalColCount int `json:"original_col_count"` - EmptyValue StateID `json:"empty_value"` - Entries []StateID `json:"entries"` - Bounds []int `json:"bounds"` - RowDisplacement []int `json:"row_displacement"` -} - -type UniqueEntriesTable struct { - UniqueEntries *RowDisplacementTable `json:"unique_entries,omitempty"` - UncompressedUniqueEntries []StateID `json:"uncompressed_unique_entries,omitempty"` - RowNums []int `json:"row_nums"` - OriginalRowCount int `json:"original_row_count"` - OriginalColCount int `json:"original_col_count"` - EmptyValue int `json:"empty_value"` -} - -type TransitionTable struct { - InitialStateID StateID `json:"initial_state_id"` - AcceptingStates []LexModeKindID `json:"accepting_states"` - RowCount int `json:"row_count"` - ColCount int `json:"col_count"` - Transition *UniqueEntriesTable `json:"transition,omitempty"` - UncompressedTransition []StateID `json:"uncompressed_transition,omitempty"` -} - -type CompiledLexModeSpec struct { - KindNames []LexKindName `json:"kind_names"` - Push []LexModeID `json:"push"` - Pop []int `json:"pop"` - DFA *TransitionTable `json:"dfa"` -} - -type CompiledLexSpec struct { - Name string `json:"name"` - InitialModeID LexModeID `json:"initial_mode_id"` - ModeNames []LexModeName `json:"mode_names"` - KindNames []LexKindName `json:"kind_names"` - KindIDs [][]LexKindID `json:"kind_ids"` - CompressionLevel int `json:"compression_level"` - Specs []*CompiledLexModeSpec `json:"specs"` -} |