diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-06-19 00:38:27 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-06-19 00:38:27 +0900 |
commit | 52379a9e26578eb1105e73839db8157e24ba2d4c (patch) | |
tree | 1b03f2da6d9a67bd69823d01a050c2a95f1e9e55 | |
parent | Add SLR parsing table generator (diff) | |
download | cotia-52379a9e26578eb1105e73839db8157e24ba2d4c.tar.gz cotia-52379a9e26578eb1105e73839db8157e24ba2d4c.tar.xz |
Add driver
-rw-r--r-- | driver/parser.go | 155 | ||||
-rw-r--r-- | driver/parser_test.go | 50 | ||||
-rw-r--r-- | grammar/grammar.go | 99 | ||||
-rw-r--r-- | spec/grammar.go | 33 |
4 files changed, 337 insertions, 0 deletions
diff --git a/driver/parser.go b/driver/parser.go new file mode 100644 index 0000000..12081bd --- /dev/null +++ b/driver/parser.go @@ -0,0 +1,155 @@ +package driver + +import ( + "fmt" + "io" + + mldriver "github.com/nihei9/maleeni/driver" + "github.com/nihei9/vartan/spec" +) + +type CST struct { + KindName string + Text string + Children []*CST +} + +func printCST(cst *CST, depth int) { + for i := 0; i < depth; i++ { + fmt.Printf(" ") + } + fmt.Printf("%v", cst.KindName) + if cst.Text != "" { + fmt.Printf(` "%v"`, cst.Text) + } + fmt.Printf("\n") + for _, c := range cst.Children { + printCST(c, depth+1) + } +} + +type semanticFrame struct { + cst *CST +} + +type Parser struct { + gram *spec.CompiledGrammar + lex *mldriver.Lexer + stateStack []int + semStack []*semanticFrame + cst *CST +} + +func NewParser(gram *spec.CompiledGrammar, src io.Reader) (*Parser, error) { + lex, err := mldriver.NewLexer(gram.LexicalSpecification.Maleeni.Spec, src) + if err != nil { + return nil, err + } + + return &Parser{ + gram: gram, + lex: lex, + stateStack: []int{}, + semStack: []*semanticFrame{}, + }, nil +} + +func (p *Parser) Parse() error { + termCount := p.gram.ParsingTable.TerminalCount + p.push(p.gram.ParsingTable.InitialState) + tok, err := p.lex.Next() + if err != nil { + return err + } + if tok.Invalid { + return fmt.Errorf("invalid token: %+v", tok) + } + for { + var tsym int + if tok.EOF { + tsym = p.gram.ParsingTable.EOFSymbol + } else { + tsym = p.gram.LexicalSpecification.Maleeni.KindToTerminal[tok.Mode.Int()][tok.Kind] + } + act := p.gram.ParsingTable.Action[p.top()*termCount+tsym] + switch { + case act < 0: // Shift + tokText := tok.Text() + tok, err = p.shift(act * -1) + if err != nil { + return err + } + // semantic action + p.semStack = append(p.semStack, &semanticFrame{ + cst: &CST{ + KindName: p.gram.ParsingTable.Terminals[tsym], + Text: tokText, + }, + }) + case act > 0: // Reduce + accepted := p.reduce(act) + if accepted { + p.cst = p.semStack[len(p.semStack)-1].cst + return nil + } + // semantic action + prodNum := act + lhs := p.gram.ParsingTable.LHSSymbols[prodNum] + n := p.gram.ParsingTable.AlternativeSymbolCounts[prodNum] + children := []*CST{} + for _, f := range p.semStack[len(p.semStack)-n:] { + children = append(children, f.cst) + } + p.semStack = p.semStack[:len(p.semStack)-n] + p.semStack = append(p.semStack, &semanticFrame{ + cst: &CST{ + KindName: p.gram.ParsingTable.NonTerminals[lhs], + Children: children, + }, + }) + default: + return fmt.Errorf("unexpected token: %v", tok) + } + } +} + +func (p *Parser) shift(nextState int) (*mldriver.Token, error) { + p.push(nextState) + tok, err := p.lex.Next() + if err != nil { + return nil, err + } + if tok.Invalid { + return nil, fmt.Errorf("invalid token: %+v", tok) + } + return tok, nil +} + +func (p *Parser) reduce(prodNum int) bool { + tab := p.gram.ParsingTable + lhs := tab.LHSSymbols[prodNum] + if lhs == tab.LHSSymbols[tab.StartProduction] { + return true + } + n := tab.AlternativeSymbolCounts[prodNum] + p.pop(n) + nextState := tab.GoTo[p.top()*tab.NonTerminalCount+lhs] + p.push(nextState) + return false +} + +func (p *Parser) top() int { + return p.stateStack[len(p.stateStack)-1] +} + +func (p *Parser) push(state int) { + p.stateStack = append(p.stateStack, state) +} + +func (p *Parser) pop(n int) { + p.stateStack = p.stateStack[:len(p.stateStack)-n] +} + +func (p *Parser) GetCST() *CST { + return p.cst +} diff --git a/driver/parser_test.go b/driver/parser_test.go new file mode 100644 index 0000000..bc6a34e --- /dev/null +++ b/driver/parser_test.go @@ -0,0 +1,50 @@ +package driver + +import ( + "strings" + "testing" + + "github.com/nihei9/vartan/grammar" + "github.com/nihei9/vartan/spec" +) + +func TestParser_Parse(t *testing.T) { + specSrc := ` +expr + : expr "\+" term + | term + ; +term + : term "\*" factor + | factor + ; +factor + : "\(" expr "\)" + | id + ; +id: "[A-Za-z_][0-9A-Za-z_]*"; +` + ast, err := spec.Parse(strings.NewReader(specSrc)) + if err != nil { + t.Fatal(err) + } + g, err := grammar.NewGrammar(ast) + if err != nil { + t.Fatal(err) + } + gram, err := grammar.Compile(g) + if err != nil { + t.Fatal(err) + } + src := `(a+(b+c))*d+e` + p, err := NewParser(gram, strings.NewReader(src)) + if err != nil { + t.Fatal(err) + } + err = p.Parse() + if err != nil { + t.Fatal(err) + } + + printCST(p.GetCST(), 0) +} diff --git a/grammar/grammar.go b/grammar/grammar.go index ce0ff19..2a7953a 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -3,6 +3,7 @@ package grammar import ( "fmt" + mlcompiler "github.com/nihei9/maleeni/compiler" mlspec "github.com/nihei9/maleeni/spec" "github.com/nihei9/vartan/spec" ) @@ -156,3 +157,101 @@ func isLexicalProduction(prod *spec.ProductionNode) bool { } return false } + +func Compile(gram *Grammar) (*spec.CompiledGrammar, error) { + lexSpec, err := mlcompiler.Compile(gram.lexSpec, mlcompiler.CompressionLevel(mlcompiler.CompressionLevelMax)) + if err != nil { + return nil, err + } + + kind2Term := make([][]int, len(lexSpec.Modes)) + for modeNum, spec := range lexSpec.Specs { + if modeNum == 0 { + kind2Term[0] = nil + continue + } + rec := make([]int, len(spec.Kinds)) + for n, k := range spec.Kinds { + if n == 0 { + rec[0] = symbolNil.num().Int() + continue + } + sym, ok := gram.symbolTable.toSymbol(k.String()) + if !ok { + return nil, fmt.Errorf("terminal symbol '%v' (in '%v' mode) is not found in a symbol table", k, lexSpec.Modes[modeNum]) + } + rec[n] = sym.num().Int() + } + kind2Term[modeNum] = rec + } + + terms, err := gram.symbolTable.getTerminalTexts() + if err != nil { + return nil, err + } + + nonTerms, err := gram.symbolTable.getNonTerminalTexts() + if err != nil { + return nil, err + } + + firstSet, err := genFirstSet(gram.productionSet) + if err != nil { + return nil, err + } + + followSet, err := genFollowSet(gram.productionSet, firstSet) + if err != nil { + return nil, err + } + + lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol) + if err != nil { + return nil, err + } + + tab, err := genSLRParsingTable(lr0, gram.productionSet, followSet, len(terms), len(nonTerms)) + if err != nil { + return nil, err + } + + action := make([]int, len(tab.actionTable)) + for i, e := range tab.actionTable { + action[i] = int(e) + } + goTo := make([]int, len(tab.goToTable)) + for i, e := range tab.goToTable { + goTo[i] = int(e) + } + + lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1) + altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1) + for _, p := range gram.productionSet.getAllProductions() { + lhsSyms[p.num] = p.lhs.num().Int() + altSymCounts[p.num] = p.rhsLen + } + + return &spec.CompiledGrammar{ + LexicalSpecification: &spec.LexicalSpecification{ + Lexer: "maleeni", + Maleeni: &spec.Maleeni{ + Spec: lexSpec, + KindToTerminal: kind2Term, + }, + }, + ParsingTable: &spec.ParsingTable{ + Action: action, + GoTo: goTo, + StateCount: tab.stateCount, + InitialState: tab.InitialState.Int(), + StartProduction: productionNumStart.Int(), + LHSSymbols: lhsSyms, + AlternativeSymbolCounts: altSymCounts, + Terminals: terms, + TerminalCount: tab.terminalCount, + NonTerminals: nonTerms, + NonTerminalCount: tab.nonTerminalCount, + EOFSymbol: symbolEOF.num().Int(), + }, + }, nil +} diff --git a/spec/grammar.go b/spec/grammar.go new file mode 100644 index 0000000..653b50d --- /dev/null +++ b/spec/grammar.go @@ -0,0 +1,33 @@ +package spec + +import mlspec "github.com/nihei9/maleeni/spec" + +type CompiledGrammar struct { + LexicalSpecification *LexicalSpecification `json:"lexical_specification"` + ParsingTable *ParsingTable `json:"parsing_table"` +} + +type LexicalSpecification struct { + Lexer string `json:"lexer"` + Maleeni *Maleeni `json:"maleeni"` +} + +type Maleeni struct { + Spec *mlspec.CompiledLexSpec `json:"spec"` + KindToTerminal [][]int `json:"kind_to_terminal"` +} + +type ParsingTable struct { + Action []int `json:"action"` + GoTo []int `json:"goto"` + StateCount int `json:"state_count"` + InitialState int `json:"initial_state"` + StartProduction int `json:"start_production"` + LHSSymbols []int `json:"lhs_symbols"` + AlternativeSymbolCounts []int `json:"alternative_symbol_counts"` + Terminals []string `json:"terminals"` + TerminalCount int `json:"terminal_count"` + NonTerminals []string `json:"non_terminals"` + NonTerminalCount int `json:"non_terminal_count"` + EOFSymbol int `json:"eof_symbol"` +} |