aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-06-20 20:22:47 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-06-20 20:22:47 +0900
commit4bd40643a5edfe427de52f4b69a1e912d11b1fc2 (patch)
treeab6021bcd493a799baf5661bf5ab9fac84c4c003
parentAdd syntax of modifiers and actions (diff)
downloadurubu-4bd40643a5edfe427de52f4b69a1e912d11b1fc2.tar.gz
urubu-4bd40643a5edfe427de52f4b69a1e912d11b1fc2.tar.xz
Add skip action
-rw-r--r--driver/parser.go33
-rw-r--r--driver/parser_test.go12
-rw-r--r--grammar/grammar.go30
-rw-r--r--spec/grammar.go1
4 files changed, 60 insertions, 16 deletions
diff --git a/driver/parser.go b/driver/parser.go
index 12081bd..adc2a54 100644
--- a/driver/parser.go
+++ b/driver/parser.go
@@ -57,13 +57,10 @@ func NewParser(gram *spec.CompiledGrammar, src io.Reader) (*Parser, error) {
func (p *Parser) Parse() error {
termCount := p.gram.ParsingTable.TerminalCount
p.push(p.gram.ParsingTable.InitialState)
- tok, err := p.lex.Next()
+ tok, err := p.nextToken()
if err != nil {
return err
}
- if tok.Invalid {
- return fmt.Errorf("invalid token: %+v", tok)
- }
for {
var tsym int
if tok.EOF {
@@ -113,16 +110,28 @@ func (p *Parser) Parse() error {
}
}
+func (p *Parser) nextToken() (*mldriver.Token, error) {
+ skip := p.gram.LexicalSpecification.Maleeni.Skip
+ for {
+ tok, err := p.lex.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.Invalid {
+ return nil, fmt.Errorf("invalid token: %+v", tok)
+ }
+
+ if skip[tok.Mode.Int()][tok.Kind] > 0 {
+ continue
+ }
+
+ return tok, nil
+ }
+}
+
func (p *Parser) shift(nextState int) (*mldriver.Token, error) {
p.push(nextState)
- tok, err := p.lex.Next()
- if err != nil {
- return nil, err
- }
- if tok.Invalid {
- return nil, fmt.Errorf("invalid token: %+v", tok)
- }
- return tok, nil
+ return p.nextToken()
}
func (p *Parser) reduce(prodNum int) bool {
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 17fee21..75b25ae 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -64,6 +64,18 @@ bar: "bar";
`,
src: `foobar`,
},
+ // The parser can skips specified tokens.
+ {
+ specSrc: `
+s
+ : foo bar
+ ;
+foo: "foo";
+bar: "bar";
+white_space: "[\u{0009}\u{0020}]+" # skip;
+`,
+ src: `foo bar`,
+ },
}
for _, tt := range tests {
ast, err := spec.Parse(strings.NewReader(tt.specSrc))
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 4507ddd..e7271c0 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -10,6 +10,7 @@ import (
type Grammar struct {
lexSpec *mlspec.LexSpec
+ skipLexKinds []mlspec.LexKind
productionSet *productionSet
augmentedStartSymbol symbol
symbolTable *symbolTable
@@ -19,6 +20,7 @@ func NewGrammar(root *spec.RootNode) (*Grammar, error) {
symTab := newSymbolTable()
anonPat2Sym := map[string]symbol{}
var lexSpec *mlspec.LexSpec
+ var skip []mlspec.LexKind
{
entries := []*mlspec.LexEntry{}
anonPats := []string{}
@@ -51,6 +53,11 @@ func NewGrammar(root *spec.RootNode) (*Grammar, error) {
if alt.Action != nil {
act := alt.Action
switch act.Name {
+ case "skip":
+ if act.Parameter != "" {
+ return nil, fmt.Errorf("action 'skip' needs no parameter")
+ }
+ skip = append(skip, mlspec.LexKind(prod.LHS))
case "push":
if act.Parameter == "" {
return nil, fmt.Errorf("action 'push' needs a parameter")
@@ -186,6 +193,7 @@ func NewGrammar(root *spec.RootNode) (*Grammar, error) {
return &Grammar{
lexSpec: lexSpec,
+ skipLexKinds: skip,
productionSet: prods,
augmentedStartSymbol: augStartSym,
symbolTable: symTab,
@@ -206,24 +214,37 @@ func Compile(gram *Grammar) (*spec.CompiledGrammar, error) {
}
kind2Term := make([][]int, len(lexSpec.Modes))
+ skip := make([][]int, len(lexSpec.Modes))
for modeNum, spec := range lexSpec.Specs {
if modeNum == 0 {
kind2Term[0] = nil
+ skip[0] = nil
continue
}
- rec := make([]int, len(spec.Kinds))
+
+ k2tRec := make([]int, len(spec.Kinds))
+ skipRec := make([]int, len(spec.Kinds))
for n, k := range spec.Kinds {
if n == 0 {
- rec[0] = symbolNil.num().Int()
+ k2tRec[0] = symbolNil.num().Int()
continue
}
+
sym, ok := gram.symbolTable.toSymbol(k.String())
if !ok {
return nil, fmt.Errorf("terminal symbol '%v' (in '%v' mode) is not found in a symbol table", k, lexSpec.Modes[modeNum])
}
- rec[n] = sym.num().Int()
+ k2tRec[n] = sym.num().Int()
+
+ for _, sk := range gram.skipLexKinds {
+ if k != sk {
+ continue
+ }
+ skipRec[n] = 1
+ }
}
- kind2Term[modeNum] = rec
+ kind2Term[modeNum] = k2tRec
+ skip[modeNum] = skipRec
}
terms, err := gram.symbolTable.getTerminalTexts()
@@ -278,6 +299,7 @@ func Compile(gram *Grammar) (*spec.CompiledGrammar, error) {
Maleeni: &spec.Maleeni{
Spec: lexSpec,
KindToTerminal: kind2Term,
+ Skip: skip,
},
},
ParsingTable: &spec.ParsingTable{
diff --git a/spec/grammar.go b/spec/grammar.go
index 653b50d..44ef3ee 100644
--- a/spec/grammar.go
+++ b/spec/grammar.go
@@ -15,6 +15,7 @@ type LexicalSpecification struct {
type Maleeni struct {
Spec *mlspec.CompiledLexSpec `json:"spec"`
KindToTerminal [][]int `json:"kind_to_terminal"`
+ Skip [][]int `json:"skip"`
}
type ParsingTable struct {