package spec import ( "fmt" "regexp" "sort" "strconv" "strings" ) // LexKindID represents an ID of a lexical kind and is unique across all modes. type LexKindID int const ( LexKindIDNil = LexKindID(0) LexKindIDMin = LexKindID(1) ) func (id LexKindID) Int() int { return int(id) } // LexModeKindID represents an ID of a lexical kind and is unique within a mode. // Use LexKindID to identify a kind across all modes uniquely. type LexModeKindID int const ( LexModeKindIDNil = LexModeKindID(0) LexModeKindIDMin = LexModeKindID(1) ) func (id LexModeKindID) Int() int { return int(id) } // LexKindName represents a name of a lexical kind. type LexKindName string const LexKindNameNil = LexKindName("") func (k LexKindName) String() string { return string(k) } func (k LexKindName) validate() error { err := validateIdentifier(k.String()) if err != nil { return fmt.Errorf("invalid kind name: %v", err) } return nil } // LexPattern represents a pattern of a lexeme. // The pattern is written in regular expression. type LexPattern string func (p LexPattern) validate() error { if p == "" { return fmt.Errorf("pattern doesn't allow to be the empty string") } return nil } // LexModeID represents an ID of a lex mode. type LexModeID int const ( LexModeIDNil = LexModeID(0) LexModeIDDefault = LexModeID(1) ) func (n LexModeID) String() string { return strconv.Itoa(int(n)) } func (n LexModeID) Int() int { return int(n) } func (n LexModeID) IsNil() bool { return n == LexModeIDNil } // LexModeName represents a name of a lex mode. type LexModeName string const ( LexModeNameNil = LexModeName("") LexModeNameDefault = LexModeName("default") ) func (m LexModeName) String() string { return string(m) } func (m LexModeName) validate() error { err := validateIdentifier(m.String()) if err != nil { return fmt.Errorf("invalid mode name: %v", err) } return nil } const idPattern = `^[a-z](_?[0-9a-z]+)*$` var idRE = regexp.MustCompile(idPattern) func validateIdentifier(id string) error { if id == "" { return fmt.Errorf("identifier doesn't allow to be the empty string") } if !idRE.MatchString(id) { return fmt.Errorf("identifier must be %v", idPattern) } return nil } func SnakeCaseToUpperCamelCase(snake string) string { elems := strings.Split(snake, "_") for i, e := range elems { if len(e) == 0 { continue } elems[i] = strings.ToUpper(string(e[0])) + e[1:] } return strings.Join(elems, "") } type LexEntry struct { Kind LexKindName `json:"kind"` Pattern LexPattern `json:"pattern"` Modes []LexModeName `json:"modes"` Push LexModeName `json:"push"` Pop bool `json:"pop"` Fragment bool `json:"fragment"` } func (e *LexEntry) validate() error { err := e.Kind.validate() if err != nil { return err } err = e.Pattern.validate() if err != nil { return err } if len(e.Modes) > 0 { for _, mode := range e.Modes { err = mode.validate() if err != nil { return err } } } return nil } type LexSpec struct { Name string `json:"name"` Entries []*LexEntry `json:"entries"` } func (s *LexSpec) Validate() error { err := validateIdentifier(s.Name) if err != nil { return fmt.Errorf("invalid specification name: %v", err) } if len(s.Entries) <= 0 { return fmt.Errorf("the lexical specification must have at least one entry") } { var errs []error for i, e := range s.Entries { err := e.validate() if err != nil { errs = append(errs, fmt.Errorf("entry #%v: %w", i+1, err)) } } if len(errs) > 0 { var b strings.Builder fmt.Fprintf(&b, "%v", errs[0]) for _, err := range errs[1:] { fmt.Fprintf(&b, "\n%v", err) } return fmt.Errorf(b.String()) } } { ks := map[string]struct{}{} fks := map[string]struct{}{} for _, e := range s.Entries { // Allow duplicate names between fragments and non-fragments. if e.Fragment { if _, exist := fks[e.Kind.String()]; exist { return fmt.Errorf("kinds `%v` are duplicates", e.Kind) } fks[e.Kind.String()] = struct{}{} } else { if _, exist := ks[e.Kind.String()]; exist { return fmt.Errorf("kinds `%v` are duplicates", e.Kind) } ks[e.Kind.String()] = struct{}{} } } } { kinds := []string{} modes := []string{ LexModeNameDefault.String(), // This is a predefined mode. } for _, e := range s.Entries { if e.Fragment { continue } kinds = append(kinds, e.Kind.String()) for _, m := range e.Modes { modes = append(modes, m.String()) } } kindErrs := findSpellingInconsistenciesErrors(kinds, nil) modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error { if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(LexModeNameDefault.String()) { var b strings.Builder fmt.Fprintf(&b, "%+v", ids[0]) for _, id := range ids[1:] { fmt.Fprintf(&b, ", %+v", id) } return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", LexModeNameDefault, b.String()) } return nil }) errs := append(kindErrs, modeErrs...) if len(errs) > 0 { var b strings.Builder fmt.Fprintf(&b, "%v", errs[0]) for _, err := range errs[1:] { fmt.Fprintf(&b, "\n%v", err) } return fmt.Errorf(b.String()) } } return nil } func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error { duplicated := FindSpellingInconsistencies(ids) if len(duplicated) == 0 { return nil } var errs []error for _, dup := range duplicated { err := hook(dup) if err != nil { errs = append(errs, err) continue } var b strings.Builder fmt.Fprintf(&b, "%+v", dup[0]) for _, id := range dup[1:] { fmt.Fprintf(&b, ", %+v", id) } err = fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String()) errs = append(errs, err) } return errs } // FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same // if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same // in UpperCamelCase. Thus they are considere to be spelling inconsistency. func FindSpellingInconsistencies(ids []string) [][]string { m := map[string][]string{} for _, id := range removeDuplicates(ids) { c := SnakeCaseToUpperCamelCase(id) m[c] = append(m[c], id) } var duplicated [][]string for _, camels := range m { if len(camels) == 1 { continue } duplicated = append(duplicated, camels) } for _, dup := range duplicated { sort.Slice(dup, func(i, j int) bool { return dup[i] < dup[j] }) } sort.Slice(duplicated, func(i, j int) bool { return duplicated[i][0] < duplicated[j][0] }) return duplicated } func removeDuplicates(s []string) []string { m := map[string]struct{}{} for _, v := range s { m[v] = struct{}{} } var unique []string for v := range m { unique = append(unique, v) } return unique } // StateID represents an ID of a state of a transition table. type StateID int const ( // StateIDNil represents an empty entry of a transition table. // When the driver reads this value, it raises an error meaning lexical analysis failed. StateIDNil = StateID(0) // StateIDMin is the minimum value of the state ID. All valid state IDs are represented as // sequential numbers starting from this value. StateIDMin = StateID(1) ) func (id StateID) Int() int { return int(id) } type RowDisplacementTable struct { OriginalRowCount int `json:"original_row_count"` OriginalColCount int `json:"original_col_count"` EmptyValue StateID `json:"empty_value"` Entries []StateID `json:"entries"` Bounds []int `json:"bounds"` RowDisplacement []int `json:"row_displacement"` } type UniqueEntriesTable struct { UniqueEntries *RowDisplacementTable `json:"unique_entries,omitempty"` UncompressedUniqueEntries []StateID `json:"uncompressed_unique_entries,omitempty"` RowNums []int `json:"row_nums"` OriginalRowCount int `json:"original_row_count"` OriginalColCount int `json:"original_col_count"` EmptyValue int `json:"empty_value"` } type TransitionTable struct { InitialStateID StateID `json:"initial_state_id"` AcceptingStates []LexModeKindID `json:"accepting_states"` RowCount int `json:"row_count"` ColCount int `json:"col_count"` Transition *UniqueEntriesTable `json:"transition,omitempty"` UncompressedTransition []StateID `json:"uncompressed_transition,omitempty"` } type CompiledLexModeSpec struct { KindNames []LexKindName `json:"kind_names"` Push []LexModeID `json:"push"` Pop []int `json:"pop"` DFA *TransitionTable `json:"dfa"` } type CompiledLexSpec struct { Name string `json:"name"` InitialModeID LexModeID `json:"initial_mode_id"` ModeNames []LexModeName `json:"mode_names"` KindNames []LexKindName `json:"kind_names"` KindIDs [][]LexKindID `json:"kind_ids"` CompressionLevel int `json:"compression_level"` Specs []*CompiledLexModeSpec `json:"specs"` }