aboutsummaryrefslogtreecommitdiff
path: root/grammar/lexical/compiler.go
diff options
context:
space:
mode:
authorEuAndreh <eu@euandre.org>2024-12-10 12:29:03 -0300
committerEuAndreh <eu@euandre.org>2024-12-10 12:29:03 -0300
commit8359c047aaebe274a2d811d61922b571ca7d10df (patch)
tree070e0ed93d27a842776ada805eeb4270e7e3c806 /grammar/lexical/compiler.go
parentStart building test files (diff)
downloadcotia-8359c047aaebe274a2d811d61922b571ca7d10df.tar.gz
cotia-8359c047aaebe274a2d811d61922b571ca7d10df.tar.xz
Namespace packages with "urubu/"
Diffstat (limited to 'grammar/lexical/compiler.go')
-rw-r--r--grammar/lexical/compiler.go413
1 files changed, 0 insertions, 413 deletions
diff --git a/grammar/lexical/compiler.go b/grammar/lexical/compiler.go
deleted file mode 100644
index 06e4068..0000000
--- a/grammar/lexical/compiler.go
+++ /dev/null
@@ -1,413 +0,0 @@
-package lexical
-
-import (
- "bytes"
- "fmt"
-
- "compressor"
- "grammar/lexical/dfa"
- psr "grammar/lexical/parser"
- spec "spec/grammar"
-)
-
-type CompileError struct {
- Kind spec.LexKindName
- Fragment bool
- Cause error
- Detail string
-}
-
-func Compile(lexspec *LexSpec, compLv int) (*spec.LexicalSpec, error, []*CompileError) {
- err := lexspec.Validate()
- if err != nil {
- return nil, fmt.Errorf("invalid lexical specification:\n%w", err), nil
- }
-
- modeEntries, modeNames, modeName2ID, fragmetns := groupEntriesByLexMode(lexspec.Entries)
-
- modeSpecs := []*spec.CompiledLexModeSpec{
- nil,
- }
- for i, es := range modeEntries[1:] {
- modeName := modeNames[i+1]
- modeSpec, err, cerrs := compile(es, modeName2ID, fragmetns, compLv)
- if err != nil {
- return nil, fmt.Errorf("failed to compile in %v mode: %w", modeName, err), cerrs
- }
- modeSpecs = append(modeSpecs, modeSpec)
- }
-
- var kindNames []spec.LexKindName
- var name2ID map[spec.LexKindName]spec.LexKindID
- {
- name2ID = map[spec.LexKindName]spec.LexKindID{}
- id := spec.LexKindIDMin
- for _, modeSpec := range modeSpecs[1:] {
- for _, name := range modeSpec.KindNames[1:] {
- if _, ok := name2ID[name]; ok {
- continue
- }
- name2ID[name] = id
- id++
- }
- }
-
- kindNames = make([]spec.LexKindName, len(name2ID)+1)
- for name, id := range name2ID {
- kindNames[id] = name
- }
- }
-
- var kindIDs [][]spec.LexKindID
- {
- kindIDs = make([][]spec.LexKindID, len(modeSpecs))
- for i, modeSpec := range modeSpecs[1:] {
- ids := make([]spec.LexKindID, len(modeSpec.KindNames))
- for modeID, name := range modeSpec.KindNames {
- if modeID == 0 {
- continue
- }
- ids[modeID] = name2ID[name]
- }
- kindIDs[i+1] = ids
- }
- }
-
- return &spec.LexicalSpec{
- InitialModeID: spec.LexModeIDDefault,
- ModeNames: modeNames,
- KindNames: kindNames,
- KindIDs: kindIDs,
- CompressionLevel: compLv,
- Specs: modeSpecs,
- }, nil, nil
-}
-
-func groupEntriesByLexMode(entries []*LexEntry) ([][]*LexEntry, []spec.LexModeName, map[spec.LexModeName]spec.LexModeID, map[spec.LexKindName]*LexEntry) {
- modeNames := []spec.LexModeName{
- spec.LexModeNameNil,
- spec.LexModeNameDefault,
- }
- modeName2ID := map[spec.LexModeName]spec.LexModeID{
- spec.LexModeNameNil: spec.LexModeIDNil,
- spec.LexModeNameDefault: spec.LexModeIDDefault,
- }
- lastModeID := spec.LexModeIDDefault
- modeEntries := [][]*LexEntry{
- nil,
- {},
- }
- fragments := map[spec.LexKindName]*LexEntry{}
- for _, e := range entries {
- if e.Fragment {
- fragments[e.Kind] = e
- continue
- }
- ms := e.Modes
- if len(ms) == 0 {
- ms = []spec.LexModeName{
- spec.LexModeNameDefault,
- }
- }
- for _, modeName := range ms {
- modeID, ok := modeName2ID[modeName]
- if !ok {
- modeID = lastModeID + 1
- lastModeID = modeID
- modeName2ID[modeName] = modeID
- modeNames = append(modeNames, modeName)
- modeEntries = append(modeEntries, []*LexEntry{})
- }
- modeEntries[modeID] = append(modeEntries[modeID], e)
- }
- }
- return modeEntries, modeNames, modeName2ID, fragments
-}
-
-func compile(
- entries []*LexEntry,
- modeName2ID map[spec.LexModeName]spec.LexModeID,
- fragments map[spec.LexKindName]*LexEntry,
- compLv int,
-) (*spec.CompiledLexModeSpec, error, []*CompileError) {
- var kindNames []spec.LexKindName
- kindIDToName := map[spec.LexModeKindID]spec.LexKindName{}
- var patterns map[spec.LexModeKindID][]byte
- {
- kindNames = append(kindNames, spec.LexKindNameNil)
- patterns = map[spec.LexModeKindID][]byte{}
- for i, e := range entries {
- kindID := spec.LexModeKindID(i + 1)
-
- kindNames = append(kindNames, e.Kind)
- kindIDToName[kindID] = e.Kind
- patterns[kindID] = []byte(e.Pattern)
- }
- }
-
- push := []spec.LexModeID{
- spec.LexModeIDNil,
- }
- pop := []int{
- 0,
- }
- for _, e := range entries {
- pushV := spec.LexModeIDNil
- if e.Push != "" {
- pushV = modeName2ID[e.Push]
- }
- push = append(push, pushV)
- popV := 0
- if e.Pop {
- popV = 1
- }
- pop = append(pop, popV)
- }
-
- fragmentPatterns := map[spec.LexKindName][]byte{}
- for k, e := range fragments {
- fragmentPatterns[k] = []byte(e.Pattern)
- }
-
- fragmentCPTrees := make(map[spec.LexKindName]psr.CPTree, len(fragmentPatterns))
- {
- var cerrs []*CompileError
- for kind, pat := range fragmentPatterns {
- p := psr.NewParser(kind, bytes.NewReader(pat))
- t, err := p.Parse()
- if err != nil {
- if err == psr.ParseErr {
- detail, cause := p.Error()
- cerrs = append(cerrs, &CompileError{
- Kind: kind,
- Fragment: true,
- Cause: cause,
- Detail: detail,
- })
- } else {
- cerrs = append(cerrs, &CompileError{
- Kind: kind,
- Fragment: true,
- Cause: err,
- })
- }
- continue
- }
- fragmentCPTrees[kind] = t
- }
- if len(cerrs) > 0 {
- return nil, fmt.Errorf("compile error"), cerrs
- }
-
- err := psr.CompleteFragments(fragmentCPTrees)
- if err != nil {
- if err == psr.ParseErr {
- for _, frag := range fragmentCPTrees {
- kind, frags, err := frag.Describe()
- if err != nil {
- return nil, err, nil
- }
-
- cerrs = append(cerrs, &CompileError{
- Kind: kind,
- Fragment: true,
- Cause: fmt.Errorf("fragment contains undefined fragments or cycles"),
- Detail: fmt.Sprintf("%v", frags),
- })
- }
-
- return nil, fmt.Errorf("compile error"), cerrs
- }
-
- return nil, err, nil
- }
- }
-
- cpTrees := map[spec.LexModeKindID]psr.CPTree{}
- {
- pats := make([]*psr.PatternEntry, len(patterns)+1)
- pats[spec.LexModeKindIDNil] = &psr.PatternEntry{
- ID: spec.LexModeKindIDNil,
- }
- for id, pattern := range patterns {
- pats[id] = &psr.PatternEntry{
- ID: id,
- Pattern: pattern,
- }
- }
-
- var cerrs []*CompileError
- for _, pat := range pats {
- if pat.ID == spec.LexModeKindIDNil {
- continue
- }
-
- p := psr.NewParser(kindIDToName[pat.ID], bytes.NewReader(pat.Pattern))
- t, err := p.Parse()
- if err != nil {
- if err == psr.ParseErr {
- detail, cause := p.Error()
- cerrs = append(cerrs, &CompileError{
- Kind: kindIDToName[pat.ID],
- Fragment: false,
- Cause: cause,
- Detail: detail,
- })
- } else {
- cerrs = append(cerrs, &CompileError{
- Kind: kindIDToName[pat.ID],
- Fragment: false,
- Cause: err,
- })
- }
- continue
- }
-
- complete, err := psr.ApplyFragments(t, fragmentCPTrees)
- if err != nil {
- return nil, err, nil
- }
- if !complete {
- _, frags, err := t.Describe()
- if err != nil {
- return nil, err, nil
- }
-
- cerrs = append(cerrs, &CompileError{
- Kind: kindIDToName[pat.ID],
- Fragment: false,
- Cause: fmt.Errorf("pattern contains undefined fragments"),
- Detail: fmt.Sprintf("%v", frags),
- })
- continue
- }
-
- cpTrees[pat.ID] = t
- }
- if len(cerrs) > 0 {
- return nil, fmt.Errorf("compile error"), cerrs
- }
- }
-
- var tranTab *spec.TransitionTable
- {
- root, symTab, err := dfa.ConvertCPTreeToByteTree(cpTrees)
- if err != nil {
- return nil, err, nil
- }
- d := dfa.GenDFA(root, symTab)
- tranTab, err = dfa.GenTransitionTable(d)
- if err != nil {
- return nil, err, nil
- }
- }
-
- var err error
- switch compLv {
- case 2:
- tranTab, err = compressTransitionTableLv2(tranTab)
- if err != nil {
- return nil, err, nil
- }
- case 1:
- tranTab, err = compressTransitionTableLv1(tranTab)
- if err != nil {
- return nil, err, nil
- }
- }
-
- return &spec.CompiledLexModeSpec{
- KindNames: kindNames,
- Push: push,
- Pop: pop,
- DFA: tranTab,
- }, nil, nil
-}
-
-const (
- CompressionLevelMin = 0
- CompressionLevelMax = 2
-)
-
-func compressTransitionTableLv2(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
- ueTab := compressor.NewUniqueEntriesTable()
- {
- orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
- if err != nil {
- return nil, err
- }
- err = ueTab.Compress(orig)
- if err != nil {
- return nil, err
- }
- }
-
- rdTab := compressor.NewRowDisplacementTable(0)
- {
- orig, err := compressor.NewOriginalTable(ueTab.UniqueEntries, ueTab.OriginalColCount)
- if err != nil {
- return nil, err
- }
- err = rdTab.Compress(orig)
- if err != nil {
- return nil, err
- }
- }
-
- tranTab.Transition = &spec.UniqueEntriesTable{
- UniqueEntries: &spec.RowDisplacementTable{
- OriginalRowCount: rdTab.OriginalRowCount,
- OriginalColCount: rdTab.OriginalColCount,
- EmptyValue: spec.StateIDNil,
- Entries: convertIntSliceToStateIDSlice(rdTab.Entries),
- Bounds: rdTab.Bounds,
- RowDisplacement: rdTab.RowDisplacement,
- },
- RowNums: ueTab.RowNums,
- OriginalRowCount: ueTab.OriginalRowCount,
- OriginalColCount: ueTab.OriginalColCount,
- }
- tranTab.UncompressedTransition = nil
-
- return tranTab, nil
-}
-
-func compressTransitionTableLv1(tranTab *spec.TransitionTable) (*spec.TransitionTable, error) {
- ueTab := compressor.NewUniqueEntriesTable()
- {
- orig, err := compressor.NewOriginalTable(convertStateIDSliceToIntSlice(tranTab.UncompressedTransition), tranTab.ColCount)
- if err != nil {
- return nil, err
- }
- err = ueTab.Compress(orig)
- if err != nil {
- return nil, err
- }
- }
-
- tranTab.Transition = &spec.UniqueEntriesTable{
- UncompressedUniqueEntries: convertIntSliceToStateIDSlice(ueTab.UniqueEntries),
- RowNums: ueTab.RowNums,
- OriginalRowCount: ueTab.OriginalRowCount,
- OriginalColCount: ueTab.OriginalColCount,
- }
- tranTab.UncompressedTransition = nil
-
- return tranTab, nil
-}
-
-func convertStateIDSliceToIntSlice(s []spec.StateID) []int {
- is := make([]int, len(s))
- for i, v := range s {
- is[i] = v.Int()
- }
- return is
-}
-
-func convertIntSliceToStateIDSlice(s []int) []spec.StateID {
- ss := make([]spec.StateID, len(s))
- for i, v := range s {
- ss[i] = spec.StateID(v)
- }
- return ss
-}