diff options
Diffstat (limited to 'spec/spec.go')
-rw-r--r-- | spec/spec.go | 154 |
1 files changed, 139 insertions, 15 deletions
diff --git a/spec/spec.go b/spec/spec.go index 9ac5f4b..62acfc4 100644 --- a/spec/spec.go +++ b/spec/spec.go @@ -3,6 +3,7 @@ package spec import ( "fmt" "regexp" + "sort" "strconv" "strings" ) @@ -42,19 +43,13 @@ func (k LexKindName) String() string { } func (k LexKindName) validate() error { - if k == "" { - return fmt.Errorf("kind doesn't allow to be the empty string") - } - if !lexKindNameRE.Match([]byte(k)) { - return fmt.Errorf("kind must be %v", lexKindNamePattern) + err := validateIdentifier(k.String()) + if err != nil { + return fmt.Errorf("invalid kind name: %v", err) } return nil } -const lexKindNamePattern = "[A-Za-z_][0-9A-Za-z_]*" - -var lexKindNameRE = regexp.MustCompile(lexKindNamePattern) - // LexPattern represents a pattern of a lexeme. // The pattern is written in regular expression. type LexPattern string @@ -99,19 +94,38 @@ func (m LexModeName) String() string { } func (m LexModeName) validate() error { - if m.isNil() || !lexModeNameRE.Match([]byte(m)) { - return fmt.Errorf("mode must be %v", lexModeNamePattern) + err := validateIdentifier(m.String()) + if err != nil { + return fmt.Errorf("invalid mode name: %v", err) } return nil } -func (m LexModeName) isNil() bool { - return m == LexModeNameNil +const idPattern = `^[A-Za-z](_?[0-9A-Za-z]+)*$` + +var idRE = regexp.MustCompile(idPattern) + +func validateIdentifier(id string) error { + if id == "" { + return fmt.Errorf("identifier doesn't allow to be the empty string") + } + if !idRE.MatchString(id) { + return fmt.Errorf("identifier must be %v", idPattern) + } + return nil } -const lexModeNamePattern = "[A-Za-z_][0-9A-Za-z_]*" +func SnakeCaseToUpperCamelCase(snake string) string { + elems := strings.Split(snake, "_") + for i, e := range elems { + if len(e) == 0 { + continue + } + elems[i] = strings.ToUpper(string(e[0])) + e[1:] + } -var lexModeNameRE = regexp.MustCompile(lexModeNamePattern) + return strings.Join(elems, "") +} type LexEntry struct { Kind LexKindName `json:"kind"` @@ -185,9 +199,119 @@ func (s *LexSpec) Validate() error { } } } + { + kinds := []string{} + modes := []string{ + LexModeNameDefault.String(), // This is a predefined mode. + } + for _, e := range s.Entries { + if e.Fragment { + continue + } + + kinds = append(kinds, e.Kind.String()) + + for _, m := range e.Modes { + modes = append(modes, m.String()) + } + } + + kindErrs := findSpellingInconsistenciesErrors(kinds, nil) + modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error { + if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(LexModeNameDefault.String()) { + var b strings.Builder + fmt.Fprintf(&b, "%+v", ids[0]) + for _, id := range ids[1:] { + fmt.Fprintf(&b, ", %+v", id) + } + return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", LexModeNameDefault, b.String()) + } + return nil + }) + errs := append(kindErrs, modeErrs...) + if len(errs) > 0 { + var b strings.Builder + fmt.Fprintf(&b, "%v", errs[0]) + for _, err := range errs[1:] { + fmt.Fprintf(&b, "\n%v", err) + } + return fmt.Errorf(b.String()) + } + } + return nil } +func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error { + duplicated := FindSpellingInconsistencies(ids) + if len(duplicated) == 0 { + return nil + } + + var errs []error + for _, dup := range duplicated { + err := hook(dup) + if err != nil { + errs = append(errs, err) + continue + } + + var b strings.Builder + fmt.Fprintf(&b, "%+v", dup[0]) + for _, id := range dup[1:] { + fmt.Fprintf(&b, ", %+v", id) + } + err = fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String()) + errs = append(errs, err) + } + + return errs +} + +// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same +// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same +// in UpperCamelCase. Thus they are considere to be spelling inconsistency. +func FindSpellingInconsistencies(ids []string) [][]string { + m := map[string][]string{} + for _, id := range removeDuplicates(ids) { + c := SnakeCaseToUpperCamelCase(id) + m[c] = append(m[c], id) + } + + var duplicated [][]string + for _, camels := range m { + if len(camels) == 1 { + continue + } + duplicated = append(duplicated, camels) + } + + for _, dup := range duplicated { + sort.Slice(dup, func(i, j int) bool { + return dup[i] < dup[j] + }) + } + sort.Slice(duplicated, func(i, j int) bool { + return duplicated[i][0] < duplicated[j][0] + }) + + return duplicated +} + +func removeDuplicates(s []string) []string { + m := map[string]struct{}{} + for _, v := range s { + m[v] = struct{}{} + } + + var unique []string + for v := range m { + unique = append(unique, v) + } + + return unique +} + // StateID represents an ID of a state of a transition table. type StateID int |