Absorb spec/

author: EuAndreh <eu@euandre.org> 2024-11-29 10:19:45 -0300
committer: EuAndreh <eu@euandre.org> 2024-11-29 10:19:45 -0300
commit: d927e9e048527d0b13d16c1ee9efbd55452f7653 (patch)
tree: 51dc9012af477ed873f62e06b737aedf7a8c0000 /spec/spec.go
parent: Absorb compressor/ code (diff)
download: tre-d927e9e048527d0b13d16c1ee9efbd55452f7653.tar.gz
tre-d927e9e048527d0b13d16c1ee9efbd55452f7653.tar.xz
1 files changed, 0 insertions, 382 deletions
diff --git a/spec/spec.go b/spec/spec.go
deleted file mode 100644
index 28d5abc..0000000
--- a/spec/spec.go
+++ /dev/null
@@ -1,382 +0,0 @@
-package spec
-
-import (
-	"fmt"
-	"regexp"
-	"sort"
-	"strconv"
-	"strings"
-)
-
-// LexKindID represents an ID of a lexical kind and is unique across all modes.
-type LexKindID int
-
-const (
-	LexKindIDNil = LexKindID(0)
-	LexKindIDMin = LexKindID(1)
-)
-
-func (id LexKindID) Int() int {
-	return int(id)
-}
-
-// LexModeKindID represents an ID of a lexical kind and is unique within a mode.
-// Use LexKindID to identify a kind across all modes uniquely.
-type LexModeKindID int
-
-const (
-	LexModeKindIDNil = LexModeKindID(0)
-	LexModeKindIDMin = LexModeKindID(1)
-)
-
-func (id LexModeKindID) Int() int {
-	return int(id)
-}
-
-// LexKindName represents a name of a lexical kind.
-type LexKindName string
-
-const LexKindNameNil = LexKindName("")
-
-func (k LexKindName) String() string {
-	return string(k)
-}
-
-func (k LexKindName) validate() error {
-	err := validateIdentifier(k.String())
-	if err != nil {
-		return fmt.Errorf("invalid kind name: %v", err)
-	}
-	return nil
-}
-
-// LexPattern represents a pattern of a lexeme.
-// The pattern is written in regular expression.
-type LexPattern string
-
-func (p LexPattern) validate() error {
-	if p == "" {
-		return fmt.Errorf("pattern doesn't allow to be the empty string")
-	}
-	return nil
-}
-
-// LexModeID represents an ID of a lex mode.
-type LexModeID int
-
-const (
-	LexModeIDNil     = LexModeID(0)
-	LexModeIDDefault = LexModeID(1)
-)
-
-func (n LexModeID) String() string {
-	return strconv.Itoa(int(n))
-}
-
-func (n LexModeID) Int() int {
-	return int(n)
-}
-
-func (n LexModeID) IsNil() bool {
-	return n == LexModeIDNil
-}
-
-// LexModeName represents a name of a lex mode.
-type LexModeName string
-
-const (
-	LexModeNameNil     = LexModeName("")
-	LexModeNameDefault = LexModeName("default")
-)
-
-func (m LexModeName) String() string {
-	return string(m)
-}
-
-func (m LexModeName) validate() error {
-	err := validateIdentifier(m.String())
-	if err != nil {
-		return fmt.Errorf("invalid mode name: %v", err)
-	}
-	return nil
-}
-
-const idPattern = `^[a-z](_?[0-9a-z]+)*$`
-
-var idRE = regexp.MustCompile(idPattern)
-
-func validateIdentifier(id string) error {
-	if id == "" {
-		return fmt.Errorf("identifier doesn't allow to be the empty string")
-	}
-	if !idRE.MatchString(id) {
-		return fmt.Errorf("identifier must be %v", idPattern)
-	}
-	return nil
-}
-
-func SnakeCaseToUpperCamelCase(snake string) string {
-	elems := strings.Split(snake, "_")
-	for i, e := range elems {
-		if len(e) == 0 {
-			continue
-		}
-		elems[i] = strings.ToUpper(string(e[0])) + e[1:]
-	}
-
-	return strings.Join(elems, "")
-}
-
-type LexEntry struct {
-	Kind     LexKindName   `json:"kind"`
-	Pattern  LexPattern    `json:"pattern"`
-	Modes    []LexModeName `json:"modes"`
-	Push     LexModeName   `json:"push"`
-	Pop      bool          `json:"pop"`
-	Fragment bool          `json:"fragment"`
-}
-
-func (e *LexEntry) validate() error {
-	err := e.Kind.validate()
-	if err != nil {
-		return err
-	}
-	err = e.Pattern.validate()
-	if err != nil {
-		return err
-	}
-	if len(e.Modes) > 0 {
-		for _, mode := range e.Modes {
-			err = mode.validate()
-			if err != nil {
-				return err
-			}
-		}
-	}
-	return nil
-}
-
-type LexSpec struct {
-	Name    string      `json:"name"`
-	Entries []*LexEntry `json:"entries"`
-}
-
-func (s *LexSpec) Validate() error {
-	err := validateIdentifier(s.Name)
-	if err != nil {
-		return fmt.Errorf("invalid specification name: %v", err)
-	}
-
-	if len(s.Entries) <= 0 {
-		return fmt.Errorf("the lexical specification must have at least one entry")
-	}
-	{
-		var errs []error
-		for i, e := range s.Entries {
-			err := e.validate()
-			if err != nil {
-				errs = append(errs, fmt.Errorf("entry #%v: %w", i+1, err))
-			}
-		}
-		if len(errs) > 0 {
-			var b strings.Builder
-			fmt.Fprintf(&b, "%v", errs[0])
-			for _, err := range errs[1:] {
-				fmt.Fprintf(&b, "\n%v", err)
-			}
-			return fmt.Errorf(b.String())
-		}
-	}
-	{
-		ks := map[string]struct{}{}
-		fks := map[string]struct{}{}
-		for _, e := range s.Entries {
-			// Allow duplicate names between fragments and non-fragments.
-			if e.Fragment {
-				if _, exist := fks[e.Kind.String()]; exist {
-					return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
-				}
-				fks[e.Kind.String()] = struct{}{}
-			} else {
-				if _, exist := ks[e.Kind.String()]; exist {
-					return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
-				}
-				ks[e.Kind.String()] = struct{}{}
-			}
-		}
-	}
-	{
-		kinds := []string{}
-		modes := []string{
-			LexModeNameDefault.String(), // This is a predefined mode.
-		}
-		for _, e := range s.Entries {
-			if e.Fragment {
-				continue
-			}
-
-			kinds = append(kinds, e.Kind.String())
-
-			for _, m := range e.Modes {
-				modes = append(modes, m.String())
-			}
-		}
-
-		kindErrs := findSpellingInconsistenciesErrors(kinds, nil)
-		modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error {
-			if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(LexModeNameDefault.String()) {
-				var b strings.Builder
-				fmt.Fprintf(&b, "%+v", ids[0])
-				for _, id := range ids[1:] {
-					fmt.Fprintf(&b, ", %+v", id)
-				}
-				return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", LexModeNameDefault, b.String())
-			}
-			return nil
-		})
-		errs := append(kindErrs, modeErrs...)
-		if len(errs) > 0 {
-			var b strings.Builder
-			fmt.Fprintf(&b, "%v", errs[0])
-			for _, err := range errs[1:] {
-				fmt.Fprintf(&b, "\n%v", err)
-			}
-			return fmt.Errorf(b.String())
-		}
-	}
-
-	return nil
-}
-
-func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error {
-	duplicated := FindSpellingInconsistencies(ids)
-	if len(duplicated) == 0 {
-		return nil
-	}
-
-	var errs []error
-	for _, dup := range duplicated {
-		if hook != nil {
-			err := hook(dup)
-			if err != nil {
-				errs = append(errs, err)
-				continue
-			}
-		}
-
-		var b strings.Builder
-		fmt.Fprintf(&b, "%+v", dup[0])
-		for _, id := range dup[1:] {
-			fmt.Fprintf(&b, ", %+v", id)
-		}
-		err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String())
-		errs = append(errs, err)
-	}
-
-	return errs
-}
-
-// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same
-// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same
-// in UpperCamelCase. Thus they are considere to be spelling inconsistency.
-func FindSpellingInconsistencies(ids []string) [][]string {
-	m := map[string][]string{}
-	for _, id := range removeDuplicates(ids) {
-		c := SnakeCaseToUpperCamelCase(id)
-		m[c] = append(m[c], id)
-	}
-
-	var duplicated [][]string
-	for _, camels := range m {
-		if len(camels) == 1 {
-			continue
-		}
-		duplicated = append(duplicated, camels)
-	}
-
-	for _, dup := range duplicated {
-		sort.Slice(dup, func(i, j int) bool {
-			return dup[i] < dup[j]
-		})
-	}
-	sort.Slice(duplicated, func(i, j int) bool {
-		return duplicated[i][0] < duplicated[j][0]
-	})
-
-	return duplicated
-}
-
-func removeDuplicates(s []string) []string {
-	m := map[string]struct{}{}
-	for _, v := range s {
-		m[v] = struct{}{}
-	}
-
-	var unique []string
-	for v := range m {
-		unique = append(unique, v)
-	}
-
-	return unique
-}
-
-// StateID represents an ID of a state of a transition table.
-type StateID int
-
-const (
-	// StateIDNil represents an empty entry of a transition table.
-	// When the driver reads this value, it raises an error meaning lexical analysis failed.
-	StateIDNil = StateID(0)
-
-	// StateIDMin is the minimum value of the state ID. All valid state IDs are represented as
-	// sequential numbers starting from this value.
-	StateIDMin = StateID(1)
-)
-
-func (id StateID) Int() int {
-	return int(id)
-}
-
-type RowDisplacementTable struct {
-	OriginalRowCount int       `json:"original_row_count"`
-	OriginalColCount int       `json:"original_col_count"`
-	EmptyValue       StateID   `json:"empty_value"`
-	Entries          []StateID `json:"entries"`
-	Bounds           []int     `json:"bounds"`
-	RowDisplacement  []int     `json:"row_displacement"`
-}
-
-type UniqueEntriesTable struct {
-	UniqueEntries             *RowDisplacementTable `json:"unique_entries,omitempty"`
-	UncompressedUniqueEntries []StateID             `json:"uncompressed_unique_entries,omitempty"`
-	RowNums                   []int                 `json:"row_nums"`
-	OriginalRowCount          int                   `json:"original_row_count"`
-	OriginalColCount          int                   `json:"original_col_count"`
-	EmptyValue                int                   `json:"empty_value"`
-}
-
-type TransitionTable struct {
-	InitialStateID         StateID             `json:"initial_state_id"`
-	AcceptingStates        []LexModeKindID     `json:"accepting_states"`
-	RowCount               int                 `json:"row_count"`
-	ColCount               int                 `json:"col_count"`
-	Transition             *UniqueEntriesTable `json:"transition,omitempty"`
-	UncompressedTransition []StateID           `json:"uncompressed_transition,omitempty"`
-}
-
-type CompiledLexModeSpec struct {
-	KindNames []LexKindName    `json:"kind_names"`
-	Push      []LexModeID      `json:"push"`
-	Pop       []int            `json:"pop"`
-	DFA       *TransitionTable `json:"dfa"`
-}
-
-type CompiledLexSpec struct {
-	Name             string                 `json:"name"`
-	InitialModeID    LexModeID              `json:"initial_mode_id"`
-	ModeNames        []LexModeName          `json:"mode_names"`
-	KindNames        []LexKindName          `json:"kind_names"`
-	KindIDs          [][]LexKindID          `json:"kind_ids"`
-	CompressionLevel int                    `json:"compression_level"`
-	Specs            []*CompiledLexModeSpec `json:"specs"`
-}
author	EuAndreh <eu@euandre.org>	2024-11-29 10:19:45 -0300
committer	EuAndreh <eu@euandre.org>	2024-11-29 10:19:45 -0300
commit	d927e9e048527d0b13d16c1ee9efbd55452f7653 (patch)
tree	51dc9012af477ed873f62e06b737aedf7a8c0000 /spec/spec.go
parent	Absorb compressor/ code (diff)
download	tre-d927e9e048527d0b13d16c1ee9efbd55452f7653.tar.gz tre-d927e9e048527d0b13d16c1ee9efbd55452f7653.tar.xz