package lexical import ( "fmt" "sort" "strings" spec "spec/grammar" ) type LexEntry struct { Kind spec.LexKindName Pattern string Modes []spec.LexModeName Push spec.LexModeName Pop bool Fragment bool } type LexSpec struct { Entries []*LexEntry } func (s *LexSpec) Validate() error { if len(s.Entries) <= 0 { return fmt.Errorf("the lexical specification must have at least one entry") } { ks := map[string]struct{}{} fks := map[string]struct{}{} for _, e := range s.Entries { // Allow duplicate names between fragments and non-fragments. if e.Fragment { if _, exist := fks[e.Kind.String()]; exist { return fmt.Errorf("kinds `%v` are duplicates", e.Kind) } fks[e.Kind.String()] = struct{}{} } else { if _, exist := ks[e.Kind.String()]; exist { return fmt.Errorf("kinds `%v` are duplicates", e.Kind) } ks[e.Kind.String()] = struct{}{} } } } { kinds := []string{} modes := []string{ spec.LexModeNameDefault.String(), // This is a predefined mode. } for _, e := range s.Entries { if e.Fragment { continue } kinds = append(kinds, e.Kind.String()) for _, m := range e.Modes { modes = append(modes, m.String()) } } kindErrs := findSpellingInconsistenciesErrors(kinds, nil) modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error { if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(spec.LexModeNameDefault.String()) { var b strings.Builder fmt.Fprintf(&b, "%+v", ids[0]) for _, id := range ids[1:] { fmt.Fprintf(&b, ", %+v", id) } return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", spec.LexModeNameDefault, b.String()) } return nil }) errs := append(kindErrs, modeErrs...) if len(errs) > 0 { var b strings.Builder fmt.Fprintf(&b, "%v", errs[0]) for _, err := range errs[1:] { fmt.Fprintf(&b, "\n%v", err) } return fmt.Errorf(b.String()) } } return nil } func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error { duplicated := FindSpellingInconsistencies(ids) if len(duplicated) == 0 { return nil } var errs []error for _, dup := range duplicated { if hook != nil { err := hook(dup) if err != nil { errs = append(errs, err) continue } } var b strings.Builder fmt.Fprintf(&b, "%+v", dup[0]) for _, id := range dup[1:] { fmt.Fprintf(&b, ", %+v", id) } err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String()) errs = append(errs, err) } return errs } // FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same // if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same // in UpperCamelCase. Thus they are considere to be spelling inconsistency. func FindSpellingInconsistencies(ids []string) [][]string { m := map[string][]string{} for _, id := range removeDuplicates(ids) { c := SnakeCaseToUpperCamelCase(id) m[c] = append(m[c], id) } var duplicated [][]string for _, camels := range m { if len(camels) == 1 { continue } duplicated = append(duplicated, camels) } for _, dup := range duplicated { sort.Slice(dup, func(i, j int) bool { return dup[i] < dup[j] }) } sort.Slice(duplicated, func(i, j int) bool { return duplicated[i][0] < duplicated[j][0] }) return duplicated } func removeDuplicates(s []string) []string { m := map[string]struct{}{} for _, v := range s { m[v] = struct{}{} } var unique []string for v := range m { unique = append(unique, v) } return unique } func SnakeCaseToUpperCamelCase(snake string) string { elems := strings.Split(snake, "_") for i, e := range elems { if len(e) == 0 { continue } elems[i] = strings.ToUpper(string(e[0])) + e[1:] } return strings.Join(elems, "") }