aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-06-19 00:38:27 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-06-19 00:38:27 +0900
commit52379a9e26578eb1105e73839db8157e24ba2d4c (patch)
tree1b03f2da6d9a67bd69823d01a050c2a95f1e9e55
parentAdd SLR parsing table generator (diff)
downloadcotia-52379a9e26578eb1105e73839db8157e24ba2d4c.tar.gz
cotia-52379a9e26578eb1105e73839db8157e24ba2d4c.tar.xz
Add driver
-rw-r--r--driver/parser.go155
-rw-r--r--driver/parser_test.go50
-rw-r--r--grammar/grammar.go99
-rw-r--r--spec/grammar.go33
4 files changed, 337 insertions, 0 deletions
diff --git a/driver/parser.go b/driver/parser.go
new file mode 100644
index 0000000..12081bd
--- /dev/null
+++ b/driver/parser.go
@@ -0,0 +1,155 @@
+package driver
+
+import (
+ "fmt"
+ "io"
+
+ mldriver "github.com/nihei9/maleeni/driver"
+ "github.com/nihei9/vartan/spec"
+)
+
+type CST struct {
+ KindName string
+ Text string
+ Children []*CST
+}
+
+func printCST(cst *CST, depth int) {
+ for i := 0; i < depth; i++ {
+ fmt.Printf(" ")
+ }
+ fmt.Printf("%v", cst.KindName)
+ if cst.Text != "" {
+ fmt.Printf(` "%v"`, cst.Text)
+ }
+ fmt.Printf("\n")
+ for _, c := range cst.Children {
+ printCST(c, depth+1)
+ }
+}
+
+type semanticFrame struct {
+ cst *CST
+}
+
+type Parser struct {
+ gram *spec.CompiledGrammar
+ lex *mldriver.Lexer
+ stateStack []int
+ semStack []*semanticFrame
+ cst *CST
+}
+
+func NewParser(gram *spec.CompiledGrammar, src io.Reader) (*Parser, error) {
+ lex, err := mldriver.NewLexer(gram.LexicalSpecification.Maleeni.Spec, src)
+ if err != nil {
+ return nil, err
+ }
+
+ return &Parser{
+ gram: gram,
+ lex: lex,
+ stateStack: []int{},
+ semStack: []*semanticFrame{},
+ }, nil
+}
+
+func (p *Parser) Parse() error {
+ termCount := p.gram.ParsingTable.TerminalCount
+ p.push(p.gram.ParsingTable.InitialState)
+ tok, err := p.lex.Next()
+ if err != nil {
+ return err
+ }
+ if tok.Invalid {
+ return fmt.Errorf("invalid token: %+v", tok)
+ }
+ for {
+ var tsym int
+ if tok.EOF {
+ tsym = p.gram.ParsingTable.EOFSymbol
+ } else {
+ tsym = p.gram.LexicalSpecification.Maleeni.KindToTerminal[tok.Mode.Int()][tok.Kind]
+ }
+ act := p.gram.ParsingTable.Action[p.top()*termCount+tsym]
+ switch {
+ case act < 0: // Shift
+ tokText := tok.Text()
+ tok, err = p.shift(act * -1)
+ if err != nil {
+ return err
+ }
+ // semantic action
+ p.semStack = append(p.semStack, &semanticFrame{
+ cst: &CST{
+ KindName: p.gram.ParsingTable.Terminals[tsym],
+ Text: tokText,
+ },
+ })
+ case act > 0: // Reduce
+ accepted := p.reduce(act)
+ if accepted {
+ p.cst = p.semStack[len(p.semStack)-1].cst
+ return nil
+ }
+ // semantic action
+ prodNum := act
+ lhs := p.gram.ParsingTable.LHSSymbols[prodNum]
+ n := p.gram.ParsingTable.AlternativeSymbolCounts[prodNum]
+ children := []*CST{}
+ for _, f := range p.semStack[len(p.semStack)-n:] {
+ children = append(children, f.cst)
+ }
+ p.semStack = p.semStack[:len(p.semStack)-n]
+ p.semStack = append(p.semStack, &semanticFrame{
+ cst: &CST{
+ KindName: p.gram.ParsingTable.NonTerminals[lhs],
+ Children: children,
+ },
+ })
+ default:
+ return fmt.Errorf("unexpected token: %v", tok)
+ }
+ }
+}
+
+func (p *Parser) shift(nextState int) (*mldriver.Token, error) {
+ p.push(nextState)
+ tok, err := p.lex.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.Invalid {
+ return nil, fmt.Errorf("invalid token: %+v", tok)
+ }
+ return tok, nil
+}
+
+func (p *Parser) reduce(prodNum int) bool {
+ tab := p.gram.ParsingTable
+ lhs := tab.LHSSymbols[prodNum]
+ if lhs == tab.LHSSymbols[tab.StartProduction] {
+ return true
+ }
+ n := tab.AlternativeSymbolCounts[prodNum]
+ p.pop(n)
+ nextState := tab.GoTo[p.top()*tab.NonTerminalCount+lhs]
+ p.push(nextState)
+ return false
+}
+
+func (p *Parser) top() int {
+ return p.stateStack[len(p.stateStack)-1]
+}
+
+func (p *Parser) push(state int) {
+ p.stateStack = append(p.stateStack, state)
+}
+
+func (p *Parser) pop(n int) {
+ p.stateStack = p.stateStack[:len(p.stateStack)-n]
+}
+
+func (p *Parser) GetCST() *CST {
+ return p.cst
+}
diff --git a/driver/parser_test.go b/driver/parser_test.go
new file mode 100644
index 0000000..bc6a34e
--- /dev/null
+++ b/driver/parser_test.go
@@ -0,0 +1,50 @@
+package driver
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/nihei9/vartan/grammar"
+ "github.com/nihei9/vartan/spec"
+)
+
+func TestParser_Parse(t *testing.T) {
+ specSrc := `
+expr
+ : expr "\+" term
+ | term
+ ;
+term
+ : term "\*" factor
+ | factor
+ ;
+factor
+ : "\(" expr "\)"
+ | id
+ ;
+id: "[A-Za-z_][0-9A-Za-z_]*";
+`
+ ast, err := spec.Parse(strings.NewReader(specSrc))
+ if err != nil {
+ t.Fatal(err)
+ }
+ g, err := grammar.NewGrammar(ast)
+ if err != nil {
+ t.Fatal(err)
+ }
+ gram, err := grammar.Compile(g)
+ if err != nil {
+ t.Fatal(err)
+ }
+ src := `(a+(b+c))*d+e`
+ p, err := NewParser(gram, strings.NewReader(src))
+ if err != nil {
+ t.Fatal(err)
+ }
+ err = p.Parse()
+ if err != nil {
+ t.Fatal(err)
+ }
+
+ printCST(p.GetCST(), 0)
+}
diff --git a/grammar/grammar.go b/grammar/grammar.go
index ce0ff19..2a7953a 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -3,6 +3,7 @@ package grammar
import (
"fmt"
+ mlcompiler "github.com/nihei9/maleeni/compiler"
mlspec "github.com/nihei9/maleeni/spec"
"github.com/nihei9/vartan/spec"
)
@@ -156,3 +157,101 @@ func isLexicalProduction(prod *spec.ProductionNode) bool {
}
return false
}
+
+func Compile(gram *Grammar) (*spec.CompiledGrammar, error) {
+ lexSpec, err := mlcompiler.Compile(gram.lexSpec, mlcompiler.CompressionLevel(mlcompiler.CompressionLevelMax))
+ if err != nil {
+ return nil, err
+ }
+
+ kind2Term := make([][]int, len(lexSpec.Modes))
+ for modeNum, spec := range lexSpec.Specs {
+ if modeNum == 0 {
+ kind2Term[0] = nil
+ continue
+ }
+ rec := make([]int, len(spec.Kinds))
+ for n, k := range spec.Kinds {
+ if n == 0 {
+ rec[0] = symbolNil.num().Int()
+ continue
+ }
+ sym, ok := gram.symbolTable.toSymbol(k.String())
+ if !ok {
+ return nil, fmt.Errorf("terminal symbol '%v' (in '%v' mode) is not found in a symbol table", k, lexSpec.Modes[modeNum])
+ }
+ rec[n] = sym.num().Int()
+ }
+ kind2Term[modeNum] = rec
+ }
+
+ terms, err := gram.symbolTable.getTerminalTexts()
+ if err != nil {
+ return nil, err
+ }
+
+ nonTerms, err := gram.symbolTable.getNonTerminalTexts()
+ if err != nil {
+ return nil, err
+ }
+
+ firstSet, err := genFirstSet(gram.productionSet)
+ if err != nil {
+ return nil, err
+ }
+
+ followSet, err := genFollowSet(gram.productionSet, firstSet)
+ if err != nil {
+ return nil, err
+ }
+
+ lr0, err := genLR0Automaton(gram.productionSet, gram.augmentedStartSymbol)
+ if err != nil {
+ return nil, err
+ }
+
+ tab, err := genSLRParsingTable(lr0, gram.productionSet, followSet, len(terms), len(nonTerms))
+ if err != nil {
+ return nil, err
+ }
+
+ action := make([]int, len(tab.actionTable))
+ for i, e := range tab.actionTable {
+ action[i] = int(e)
+ }
+ goTo := make([]int, len(tab.goToTable))
+ for i, e := range tab.goToTable {
+ goTo[i] = int(e)
+ }
+
+ lhsSyms := make([]int, len(gram.productionSet.getAllProductions())+1)
+ altSymCounts := make([]int, len(gram.productionSet.getAllProductions())+1)
+ for _, p := range gram.productionSet.getAllProductions() {
+ lhsSyms[p.num] = p.lhs.num().Int()
+ altSymCounts[p.num] = p.rhsLen
+ }
+
+ return &spec.CompiledGrammar{
+ LexicalSpecification: &spec.LexicalSpecification{
+ Lexer: "maleeni",
+ Maleeni: &spec.Maleeni{
+ Spec: lexSpec,
+ KindToTerminal: kind2Term,
+ },
+ },
+ ParsingTable: &spec.ParsingTable{
+ Action: action,
+ GoTo: goTo,
+ StateCount: tab.stateCount,
+ InitialState: tab.InitialState.Int(),
+ StartProduction: productionNumStart.Int(),
+ LHSSymbols: lhsSyms,
+ AlternativeSymbolCounts: altSymCounts,
+ Terminals: terms,
+ TerminalCount: tab.terminalCount,
+ NonTerminals: nonTerms,
+ NonTerminalCount: tab.nonTerminalCount,
+ EOFSymbol: symbolEOF.num().Int(),
+ },
+ }, nil
+}
diff --git a/spec/grammar.go b/spec/grammar.go
new file mode 100644
index 0000000..653b50d
--- /dev/null
+++ b/spec/grammar.go
@@ -0,0 +1,33 @@
+package spec
+
+import mlspec "github.com/nihei9/maleeni/spec"
+
+type CompiledGrammar struct {
+ LexicalSpecification *LexicalSpecification `json:"lexical_specification"`
+ ParsingTable *ParsingTable `json:"parsing_table"`
+}
+
+type LexicalSpecification struct {
+ Lexer string `json:"lexer"`
+ Maleeni *Maleeni `json:"maleeni"`
+}
+
+type Maleeni struct {
+ Spec *mlspec.CompiledLexSpec `json:"spec"`
+ KindToTerminal [][]int `json:"kind_to_terminal"`
+}
+
+type ParsingTable struct {
+ Action []int `json:"action"`
+ GoTo []int `json:"goto"`
+ StateCount int `json:"state_count"`
+ InitialState int `json:"initial_state"`
+ StartProduction int `json:"start_production"`
+ LHSSymbols []int `json:"lhs_symbols"`
+ AlternativeSymbolCounts []int `json:"alternative_symbol_counts"`
+ Terminals []string `json:"terminals"`
+ TerminalCount int `json:"terminal_count"`
+ NonTerminals []string `json:"non_terminals"`
+ NonTerminalCount int `json:"non_terminal_count"`
+ EOFSymbol int `json:"eof_symbol"`
+}