package lexical

import (
	"bytes"
	"fmt"
	"sort"
	"strings"

	"urubu/compressor"
	"urubu/grammar/lexical/dfa"
	psr "urubu/grammar/lexical/parser"
	spec "urubu/spec/grammar"
)

type CompileError struct {
	Kind     spec.LexKindName
	Fragment bool
	Cause    error
	Detail   string
}

func Compile(lexspec *LexSpec, compLv int) (*spec.LexicalSpec, error, []*CompileError) {
	err := lexspec.Validate()
	if err != nil {
		return nil, fmt.Errorf("invalid lexical specification:\n%w", err), nil
	}

	modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries)

	modeSpecs := []*spec.CompiledLexModeSpec{
		nil,
	}
	for i, es := range modeEntries[1:] {
		modeName := modeNames[i+1]
		modeSpec, err, cerrs := compile(es, modeName2ID, fragmetns, compLv)
		if err != nil {
			return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err), cerrs
		}
		modeSpecs = append(modeSpecs, modeSpec)
	}

	var kindNames []spec.LexKindName
	var name2ID map[spec.LexKindName]spec.LexKindID
	{
		name2ID = map[spec.LexKindName]spec.LexKindID{}
		id := spec.LexKindIDMin
		for _, modeSpec := range modeSpecs[1:] {
			for _, name := range modeSpec.KindNames[1:] {
				if _, ok := name2ID[name]; ok {
					continue
				}
				name2ID[name] = id
				id++
			}
		}

		kindNames = make([]spec.LexKindName, len(name2ID)+1)
		for name, id := range name2ID {
			kindNames[id] = name
		}
	}

	var kindIDs [][]spec.LexKindID
	{
		kindIDs = make([][]spec.LexKindID, len(modeSpecs))
		for i, modeSpec := range modeSpecs[1:] {
			ids := make([]spec.LexKindID, len(modeSpec.KindNames))
			for modeID, name := range modeSpec.KindNames {
				if modeID == 0 {
					continue
				}
				ids[modeID] = name2ID[name]
			}
			kindIDs[i+1] = ids
		}
	}

	return &spec.LexicalSpec{
		InitialModeID:    spec.LexModeIDDefault,
		ModeNames:        modeNames,
		KindNames:        kindNames,
		KindIDs:          kindIDs,
		CompressionLevel: compLv,
		Specs:            modeSpecs,
	}, nil, nil
}

func groupEntriesByLexMode(entries []*LexEntry) ([][]*LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[spec.LexKindName]*LexEntry) {
	modeNames := []spec.LexModeName{
		spec.LexModeNameNil,
		spec.LexModeNameDefault,
	}
	modeName2ID := map[spec.LexModeName]spec.LexModeID{
		spec.LexModeNameNil:     spec.LexModeIDNil,
		spec.LexModeNameDefault: spec.LexModeIDDefault,
	}
	lastModeID := spec.LexModeIDDefault
	modeEntries := [][]*LexEntry{
		nil,
		{},
	}
	fragments := map[spec.LexKindName]*LexEntry{}
	for _, e := range entries {
		if e.Fragment {
			fragments[e.Kind] = e
			continue
		}
		ms := e.Modes
		if len(ms) == 0 {
			ms = []spec.LexModeName{
				spec.LexModeNameDefault,
			}
		}
		for _, modeName := range ms {
			modeID, ok := modeName2ID[modeName]
			if !ok {
				modeID = lastModeID + 1
				lastModeID = modeID
				modeName2ID[modeName] = modeID
				modeNames = append(modeNames, modeName)
				modeEntries = append(modeEntries, []*LexEntry{})
			}
			modeEntries[modeID] = append(modeEntries[modeID], e)
		}
	}
	return modeEntries, modeNames, modeName2ID, fragments
}

func compile(
	entries []*LexEntry,
	modeName2ID map[spec.LexModeName]spec.LexModeID,
	fragments map[spec.LexKindName]*LexEntry,
	compLv int,
) (*spec.CompiledLexModeSpec, error, []*CompileError) {
	var kindNames []spec.LexKindName
	kindIDToName := map[spec.LexModeKindID]spec.LexKindName{}
	var patterns map[spec.LexModeKindID][]byte
	{
		kindNames = append(kindNames, spec.LexKindNameNil)
		patterns = map[spec.LexModeKindID][]byte{}
		for i, e := range entries {
			kindID := spec.LexModeKindID(i + 1)

			kindNames = append(kindNames, e.Kind)
			kindIDToName[kindID] = e.Kind
			patterns[kindID] = []byte(e.Pattern)
		}
	}

	push := []spec.LexModeID{
		spec.LexModeIDNil,
	}
	pop := []int{
		0,
	}
	for _, e := range entries {
		pushV := spec.LexModeIDNil
		if e.Push != "" {
			pushV = modeName2ID[e.Push]
		}
		push = append(push, pushV)
		popV := 0
		if e.Pop {
			popV = 1
		}
		pop = append(pop, popV)
	}

	fragmentPatterns := map[spec.LexKindName][]byte{}
	for k, e := range fragments {
		fragmentPatterns[k] = []byte(e.Pattern)
	}

	fragmentCPTrees := make(map[spec.LexKindName]psr.CPTree, len(fragmentPatterns))
	{
		var cerrs []*CompileError
		for kind, pat := range fragmentPatterns {
			p := psr.NewParser(kind, bytes.NewReader(pat))
			t, err := p.Parse()
			if err != nil {
				if err == psr.ParseErr {
					detail, cause := p.Error()
					cerrs = append(cerrs, &CompileError{
						Kind:     kind,
						Fragment: true,
						Cause:    cause,
						Detail:   detail,
					})
				} else {
					cerrs = append(cerrs, &CompileError{
						Kind:     kind,
						Fragment: true,
						Cause:    err,
					})
				}
				continue
			}
			fragmentCPTrees[kind] = t
		}
		if len(cerrs) > 0 {
			return nil, fmt.Errorf("compile error"), cerrs
		}

		err := psr.CompleteFragments(fragmentCPTrees)
		if err != nil {
			if err == psr.ParseErr {
				for _, frag := range fragmentCPTrees {
					kind, frags, err := frag.Describe()
					if err != nil {
						return nil, err, nil
					}

					cerrs = append(cerrs, &CompileError{
						Kind:     kind,
						Fragment: true,
						Cause:    fmt.Errorf("fragment contains undefined fragments or cycles"),
						Detail:   fmt.Sprintf("%v", frags),
					})
				}

				return nil, fmt.Errorf("compile error"), cerrs
			}

			return nil, err, nil
		}
	}

	cpTrees := map[spec.LexModeKindID]psr.CPTree{}
	{
		pats := make([]*psr.PatternEntry, len(patterns)+1)
		pats[spec.LexModeKindIDNil] = &psr.PatternEntry{
			ID: spec.LexModeKindIDNil,
		}
		for id, pattern := range patterns {
			pats[id] = &psr.PatternEntry{
				ID:      id,
				Pattern: pattern,
			}
		}

		var cerrs []*CompileError
		for _, pat := range pats {
			if pat.ID == spec.LexModeKindIDNil {
				continue
			}

			p := psr.NewParser(kindIDToName[pat.ID], bytes.NewReader(pat.Pattern))
			t, err := p.Parse()
			if err != nil {
				if err == psr.ParseErr {
					detail, cause := p.Error()
					cerrs = append(cerrs, &CompileError{
						Kind:     kindIDToName[pat.ID],
						Fragment: false,
						Cause:    cause,
						Detail:   detail,
					})
				} else {
					cerrs = append(cerrs, &CompileError{
						Kind:     kindIDToName[pat.ID],
						Fragment: false,
						Cause:    err,
					})
				}
				continue
			}

			complete, err := psr.ApplyFragments(t, fragmentCPTrees)
			if err != nil {
				return nil, err, nil
			}
			if !complete {
				_, frags, err := t.Describe()
				if err != nil {
					return nil, err, nil
				}

				cerrs = append(cerrs, &CompileError{
					Kind:     kindIDToName[pat.ID],
					Fragment: false,
					Cause:    fmt.Errorf("pattern contains undefined fragments"),
					Detail:   fmt.Sprintf("%v", frags),
				})
				continue
			}

			cpTrees[pat.ID] = t
		}
		if len(cerrs) > 0 {
			return nil, fmt.Errorf("compile error"), cerrs
		}
	}

	var tranTab *spec.TransitionTable
	{
		root, symTab, err := dfa.ConvertCPTreeToByteTree(cpTrees)
		if err != nil {
			return nil, err, nil
		}
		d := dfa.GenDFA(root, symTab)
		tranTab, err = dfa.GenTransitionTable(d)
		if err != nil {
			return nil, err, nil
		}
	}

	var err error
	switch compLv {
	case 2:
		tranTab, err = compressTransitionTableLv2(tranTab)
		if err != nil {
			return nil, err, nil
		}
	case 1:
		tranTab, err = compressTransitionTableLv1(tranTab)
		if err != nil {
			return nil, err, nil
		}
	}

	return &spec.CompiledLexModeSpec{
		KindNames: kindNames,
		Push:      push,
		Pop:       pop,
		DFA:       tranTab,
	}, nil, nil
}

const (
	CompressionLevelMin = 0
	CompressionLevelMax = 2
)

func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
	ueTab := compressor.NewUniqueEntriesTable()
	{
		orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
		if err != nil {
			return nil, err
		}
		err = ueTab.Compress(orig)
		if err != nil {
			return nil, err
		}
	}

	rdTab := compressor.NewRowDisplacementTable(0)
	{
		orig, err := compressor.NewOriginalTable(ueTab.UniqueEntries, ueTab.OriginalColCount)
		if err != nil {
			return nil, err
		}
		err = rdTab.Compress(orig)
		if err != nil {
			return nil, err
		}
	}

	tranTab.Transition = &spec.UniqueEntriesTable{
		UniqueEntries: &spec.RowDisplacementTable{
			OriginalRowCount: rdTab.OriginalRowCount,
			OriginalColCount: rdTab.OriginalColCount,
			EmptyValue:       spec.StateIDNil,
			Entries:          convertIntSliceToStateIDSlice(rdTab.Entries),
			Bounds:           rdTab.Bounds,
			RowDisplacement:  rdTab.RowDisplacement,
		},
		RowNums:          ueTab.RowNums,
		OriginalRowCount: ueTab.OriginalRowCount,
		OriginalColCount: ueTab.OriginalColCount,
	}
	tranTab.UncompressedTransition = nil

	return tranTab, nil
}

func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
	ueTab := compressor.NewUniqueEntriesTable()
	{
		orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
		if err != nil {
			return nil, err
		}
		err = ueTab.Compress(orig)
		if err != nil {
			return nil, err
		}
	}

	tranTab.Transition = &spec.UniqueEntriesTable{
		UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries),
		RowNums:                   ueTab.RowNums,
		OriginalRowCount:          ueTab.OriginalRowCount,
		OriginalColCount:          ueTab.OriginalColCount,
	}
	tranTab.UncompressedTransition = nil

	return tranTab, nil
}

func convertStateIDSliceToIntSlice(s []spec.StateID) []int {
	is := make([]int, len(s))
	for i, v := range s {
		is[i] = v.Int()
	}
	return is
}

func convertIntSliceToStateIDSlice(s []int) []spec.StateID {
	ss := make([]spec.StateID, len(s))
	for i, v := range s {
		ss[i] = spec.StateID(v)
	}
	return ss
}

type LexEntry struct {
	Kind     spec.LexKindName
	Pattern  string
	Modes    []spec.LexModeName
	Push     spec.LexModeName
	Pop      bool
	Fragment bool
}

type LexSpec struct {
	Entries []*LexEntry
}

func (s *LexSpec) Validate() error {
	if len(s.Entries) <= 0 {
		return fmt.Errorf("the lexical specification must have at least one entry")
	}
	{
		ks := map[string]struct{}{}
		fks := map[string]struct{}{}
		for _, e := range s.Entries {
			// Allow duplicate names between fragments and non-fragments.
			if e.Fragment {
				if _, exist := fks[e.Kind.String()]; exist {
					return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
				}
				fks[e.Kind.String()] = struct{}{}
			} else {
				if _, exist := ks[e.Kind.String()]; exist {
					return fmt.Errorf("kinds `%v` are duplicates", e.Kind)
				}
				ks[e.Kind.String()] = struct{}{}
			}
		}
	}
	{
		kinds := []string{}
		modes := []string{
			spec.LexModeNameDefault.String(), // This is a predefined mode.
		}
		for _, e := range s.Entries {
			if e.Fragment {
				continue
			}

			kinds = append(kinds, e.Kind.String())

			for _, m := range e.Modes {
				modes = append(modes, m.String())
			}
		}

		kindErrs := findSpellingInconsistenciesErrors(kinds, nil)
		modeErrs := findSpellingInconsistenciesErrors(modes, func(ids []string) error {
			if SnakeCaseToUpperCamelCase(ids[0]) == SnakeCaseToUpperCamelCase(spec.LexModeNameDefault.String()) {
				var b strings.Builder
				fmt.Fprintf(&b, "%+v", ids[0])
				for _, id := range ids[1:] {
					fmt.Fprintf(&b, ", %+v", id)
				}
				return fmt.Errorf("these identifiers are treated as the same. please use the same spelling as predefined '%v': %v", spec.LexModeNameDefault, b.String())
			}
			return nil
		})
		errs := append(kindErrs, modeErrs...)
		if len(errs) > 0 {
			var b strings.Builder
			fmt.Fprintf(&b, "%v", errs[0])
			for _, err := range errs[1:] {
				fmt.Fprintf(&b, "\n%v", err)
			}
			return fmt.Errorf(b.String())
		}
	}

	return nil
}

func findSpellingInconsistenciesErrors(ids []string, hook func(ids []string) error) []error {
	duplicated := FindSpellingInconsistencies(ids)
	if len(duplicated) == 0 {
		return nil
	}

	var errs []error
	for _, dup := range duplicated {
		if hook != nil {
			err := hook(dup)
			if err != nil {
				errs = append(errs, err)
				continue
			}
		}

		var b strings.Builder
		fmt.Fprintf(&b, "%+v", dup[0])
		for _, id := range dup[1:] {
			fmt.Fprintf(&b, ", %+v", id)
		}
		err := fmt.Errorf("these identifiers are treated as the same. please use the same spelling: %v", b.String())
		errs = append(errs, err)
	}

	return errs
}

// FindSpellingInconsistencies finds spelling inconsistencies in identifiers. The identifiers are considered to be the same
// if they are spelled the same when expressed in UpperCamelCase. For example, `left_paren` and `LeftParen` are spelled the same
// in UpperCamelCase. Thus they are considere to be spelling inconsistency.
func FindSpellingInconsistencies(ids []string) [][]string {
	m := map[string][]string{}
	for _, id := range removeDuplicates(ids) {
		c := SnakeCaseToUpperCamelCase(id)
		m[c] = append(m[c], id)
	}

	var duplicated [][]string
	for _, camels := range m {
		if len(camels) == 1 {
			continue
		}
		duplicated = append(duplicated, camels)
	}

	for _, dup := range duplicated {
		sort.Slice(dup, func(i, j int) bool {
			return dup[i] < dup[j]
		})
	}
	sort.Slice(duplicated, func(i, j int) bool {
		return duplicated[i][0] < duplicated[j][0]
	})

	return duplicated
}

func removeDuplicates(s []string) []string {
	m := map[string]struct{}{}
	for _, v := range s {
		m[v] = struct{}{}
	}

	var unique []string
	for v := range m {
		unique = append(unique, v)
	}

	return unique
}

func SnakeCaseToUpperCamelCase(snake string) string {
	elems := strings.Split(snake, "_")
	for i, e := range elems {
		if len(e) == 0 {
			continue
		}
		elems[i] = strings.ToUpper(string(e[0])) + e[1:]
	}

	return strings.Join(elems, "")
}