diff options
author | EuAndreh <eu@euandre.org> | 2024-12-10 12:29:03 -0300 |
---|---|---|
committer | EuAndreh <eu@euandre.org> | 2024-12-10 12:29:03 -0300 |
commit | 8359c047aaebe274a2d811d61922b571ca7d10df (patch) | |
tree | 070e0ed93d27a842776ada805eeb4270e7e3c806 /grammar/lexical/compiler.go | |
parent | Start building test files (diff) | |
download | cotia-8359c047aaebe274a2d811d61922b571ca7d10df.tar.gz cotia-8359c047aaebe274a2d811d61922b571ca7d10df.tar.xz |
Namespace packages with "urubu/"
Diffstat (limited to 'grammar/lexical/compiler.go')
-rw-r--r-- | grammar/lexical/compiler.go | 413 |
1 files changed, 0 insertions, 413 deletions
diff --git a/grammar/lexical/compiler.go b/grammar/lexical/compiler.go deleted file mode 100644 index 06e4068..0000000 --- a/grammar/lexical/compiler.go +++ /dev/null @@ -1,413 +0,0 @@ -package lexical - -import ( - "bytes" - "fmt" - - "compressor" - "grammar/lexical/dfa" - psr "grammar/lexical/parser" - spec "spec/grammar" -) - -type CompileError struct { - Kind spec.LexKindName - Fragment bool - Cause error - Detail string -} - -func Compile(lexspec *LexSpec, compLv int) (*spec.LexicalSpec, error, []*CompileError) { - err := lexspec.Validate() - if err != nil { - return nil, fmt.Errorf("invalid lexical specification:\n%w", err), nil - } - - modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries) - - modeSpecs := []*spec.CompiledLexModeSpec{ - nil, - } - for i, es := range modeEntries[1:] { - modeName := modeNames[i+1] - modeSpec, err, cerrs := compile(es, modeName2ID, fragmetns, compLv) - if err != nil { - return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err), cerrs - } - modeSpecs = append(modeSpecs, modeSpec) - } - - var kindNames []spec.LexKindName - var name2ID map[spec.LexKindName]spec.LexKindID - { - name2ID = map[spec.LexKindName]spec.LexKindID{} - id := spec.LexKindIDMin - for _, modeSpec := range modeSpecs[1:] { - for _, name := range modeSpec.KindNames[1:] { - if _, ok := name2ID[name]; ok { - continue - } - name2ID[name] = id - id++ - } - } - - kindNames = make([]spec.LexKindName, len(name2ID)+1) - for name, id := range name2ID { - kindNames[id] = name - } - } - - var kindIDs [][]spec.LexKindID - { - kindIDs = make([][]spec.LexKindID, len(modeSpecs)) - for i, modeSpec := range modeSpecs[1:] { - ids := make([]spec.LexKindID, len(modeSpec.KindNames)) - for modeID, name := range modeSpec.KindNames { - if modeID == 0 { - continue - } - ids[modeID] = name2ID[name] - } - kindIDs[i+1] = ids - } - } - - return &spec.LexicalSpec{ - InitialModeID: spec.LexModeIDDefault, - ModeNames: modeNames, - KindNames: kindNames, - KindIDs: kindIDs, - CompressionLevel: compLv, - Specs: modeSpecs, - }, nil, nil -} - -func groupEntriesByLexMode(entries []*LexEntry) ([][]*LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[spec.LexKindName]*LexEntry) { - modeNames := []spec.LexModeName{ - spec.LexModeNameNil, - spec.LexModeNameDefault, - } - modeName2ID := map[spec.LexModeName]spec.LexModeID{ - spec.LexModeNameNil: spec.LexModeIDNil, - spec.LexModeNameDefault: spec.LexModeIDDefault, - } - lastModeID := spec.LexModeIDDefault - modeEntries := [][]*LexEntry{ - nil, - {}, - } - fragments := map[spec.LexKindName]*LexEntry{} - for _, e := range entries { - if e.Fragment { - fragments[e.Kind] = e - continue - } - ms := e.Modes - if len(ms) == 0 { - ms = []spec.LexModeName{ - spec.LexModeNameDefault, - } - } - for _, modeName := range ms { - modeID, ok := modeName2ID[modeName] - if !ok { - modeID = lastModeID + 1 - lastModeID = modeID - modeName2ID[modeName] = modeID - modeNames = append(modeNames, modeName) - modeEntries = append(modeEntries, []*LexEntry{}) - } - modeEntries[modeID] = append(modeEntries[modeID], e) - } - } - return modeEntries, modeNames, modeName2ID, fragments -} - -func compile( - entries []*LexEntry, - modeName2ID map[spec.LexModeName]spec.LexModeID, - fragments map[spec.LexKindName]*LexEntry, - compLv int, -) (*spec.CompiledLexModeSpec, error, []*CompileError) { - var kindNames []spec.LexKindName - kindIDToName := map[spec.LexModeKindID]spec.LexKindName{} - var patterns map[spec.LexModeKindID][]byte - { - kindNames = append(kindNames, spec.LexKindNameNil) - patterns = map[spec.LexModeKindID][]byte{} - for i, e := range entries { - kindID := spec.LexModeKindID(i + 1) - - kindNames = append(kindNames, e.Kind) - kindIDToName[kindID] = e.Kind - patterns[kindID] = []byte(e.Pattern) - } - } - - push := []spec.LexModeID{ - spec.LexModeIDNil, - } - pop := []int{ - 0, - } - for _, e := range entries { - pushV := spec.LexModeIDNil - if e.Push != "" { - pushV = modeName2ID[e.Push] - } - push = append(push, pushV) - popV := 0 - if e.Pop { - popV = 1 - } - pop = append(pop, popV) - } - - fragmentPatterns := map[spec.LexKindName][]byte{} - for k, e := range fragments { - fragmentPatterns[k] = []byte(e.Pattern) - } - - fragmentCPTrees := make(map[spec.LexKindName]psr.CPTree, len(fragmentPatterns)) - { - var cerrs []*CompileError - for kind, pat := range fragmentPatterns { - p := psr.NewParser(kind, bytes.NewReader(pat)) - t, err := p.Parse() - if err != nil { - if err == psr.ParseErr { - detail, cause := p.Error() - cerrs = append(cerrs, &CompileError{ - Kind: kind, - Fragment: true, - Cause: cause, - Detail: detail, - }) - } else { - cerrs = append(cerrs, &CompileError{ - Kind: kind, - Fragment: true, - Cause: err, - }) - } - continue - } - fragmentCPTrees[kind] = t - } - if len(cerrs) > 0 { - return nil, fmt.Errorf("compile error"), cerrs - } - - err := psr.CompleteFragments(fragmentCPTrees) - if err != nil { - if err == psr.ParseErr { - for _, frag := range fragmentCPTrees { - kind, frags, err := frag.Describe() - if err != nil { - return nil, err, nil - } - - cerrs = append(cerrs, &CompileError{ - Kind: kind, - Fragment: true, - Cause: fmt.Errorf("fragment contains undefined fragments or cycles"), - Detail: fmt.Sprintf("%v", frags), - }) - } - - return nil, fmt.Errorf("compile error"), cerrs - } - - return nil, err, nil - } - } - - cpTrees := map[spec.LexModeKindID]psr.CPTree{} - { - pats := make([]*psr.PatternEntry, len(patterns)+1) - pats[spec.LexModeKindIDNil] = &psr.PatternEntry{ - ID: spec.LexModeKindIDNil, - } - for id, pattern := range patterns { - pats[id] = &psr.PatternEntry{ - ID: id, - Pattern: pattern, - } - } - - var cerrs []*CompileError - for _, pat := range pats { - if pat.ID == spec.LexModeKindIDNil { - continue - } - - p := psr.NewParser(kindIDToName[pat.ID], bytes.NewReader(pat.Pattern)) - t, err := p.Parse() - if err != nil { - if err == psr.ParseErr { - detail, cause := p.Error() - cerrs = append(cerrs, &CompileError{ - Kind: kindIDToName[pat.ID], - Fragment: false, - Cause: cause, - Detail: detail, - }) - } else { - cerrs = append(cerrs, &CompileError{ - Kind: kindIDToName[pat.ID], - Fragment: false, - Cause: err, - }) - } - continue - } - - complete, err := psr.ApplyFragments(t, fragmentCPTrees) - if err != nil { - return nil, err, nil - } - if !complete { - _, frags, err := t.Describe() - if err != nil { - return nil, err, nil - } - - cerrs = append(cerrs, &CompileError{ - Kind: kindIDToName[pat.ID], - Fragment: false, - Cause: fmt.Errorf("pattern contains undefined fragments"), - Detail: fmt.Sprintf("%v", frags), - }) - continue - } - - cpTrees[pat.ID] = t - } - if len(cerrs) > 0 { - return nil, fmt.Errorf("compile error"), cerrs - } - } - - var tranTab *spec.TransitionTable - { - root, symTab, err := dfa.ConvertCPTreeToByteTree(cpTrees) - if err != nil { - return nil, err, nil - } - d := dfa.GenDFA(root, symTab) - tranTab, err = dfa.GenTransitionTable(d) - if err != nil { - return nil, err, nil - } - } - - var err error - switch compLv { - case 2: - tranTab, err = compressTransitionTableLv2(tranTab) - if err != nil { - return nil, err, nil - } - case 1: - tranTab, err = compressTransitionTableLv1(tranTab) - if err != nil { - return nil, err, nil - } - } - - return &spec.CompiledLexModeSpec{ - KindNames: kindNames, - Push: push, - Pop: pop, - DFA: tranTab, - }, nil, nil -} - -const ( - CompressionLevelMin = 0 - CompressionLevelMax = 2 -) - -func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) { - ueTab := compressor.NewUniqueEntriesTable() - { - orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount) - if err != nil { - return nil, err - } - err = ueTab.Compress(orig) - if err != nil { - return nil, err - } - } - - rdTab := compressor.NewRowDisplacementTable(0) - { - orig, err := compressor.NewOriginalTable(ueTab.UniqueEntries, ueTab.OriginalColCount) - if err != nil { - return nil, err - } - err = rdTab.Compress(orig) - if err != nil { - return nil, err - } - } - - tranTab.Transition = &spec.UniqueEntriesTable{ - UniqueEntries: &spec.RowDisplacementTable{ - OriginalRowCount: rdTab.OriginalRowCount, - OriginalColCount: rdTab.OriginalColCount, - EmptyValue: spec.StateIDNil, - Entries: convertIntSliceToStateIDSlice(rdTab.Entries), - Bounds: rdTab.Bounds, - RowDisplacement: rdTab.RowDisplacement, - }, - RowNums: ueTab.RowNums, - OriginalRowCount: ueTab.OriginalRowCount, - OriginalColCount: ueTab.OriginalColCount, - } - tranTab.UncompressedTransition = nil - - return tranTab, nil -} - -func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) { - ueTab := compressor.NewUniqueEntriesTable() - { - orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount) - if err != nil { - return nil, err - } - err = ueTab.Compress(orig) - if err != nil { - return nil, err - } - } - - tranTab.Transition = &spec.UniqueEntriesTable{ - UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries), - RowNums: ueTab.RowNums, - OriginalRowCount: ueTab.OriginalRowCount, - OriginalColCount: ueTab.OriginalColCount, - } - tranTab.UncompressedTransition = nil - - return tranTab, nil -} - -func convertStateIDSliceToIntSlice(s []spec.StateID) []int { - is := make([]int, len(s)) - for i, v := range s { - is[i] = v.Int() - } - return is -} - -func convertIntSliceToStateIDSlice(s []int) []spec.StateID { - ss := make([]spec.StateID, len(s)) - for i, v := range s { - ss[i] = spec.StateID(v) - } - return ss -} |