diff options
-rw-r--r-- | driver/parser.go | 42 | ||||
-rw-r--r-- | grammar/grammar.go | 47 | ||||
-rw-r--r-- | spec/grammar.go | 1 | ||||
-rw-r--r-- | spec/lexer.go | 13 | ||||
-rw-r--r-- | spec/lexer_test.go | 6 | ||||
-rw-r--r-- | spec/parser.go | 27 |
6 files changed, 92 insertions, 44 deletions
diff --git a/driver/parser.go b/driver/parser.go index 08120da..c13413a 100644 --- a/driver/parser.go +++ b/driver/parser.go @@ -5,7 +5,6 @@ import ( "io" mldriver "github.com/nihei9/maleeni/driver" - mlspec "github.com/nihei9/maleeni/spec" "github.com/nihei9/vartan/spec" ) @@ -292,23 +291,12 @@ ACTION_LOOP: continue ACTION_LOOP } - var eKindNames []string - { - eKinds, eof := p.expectedKinds(p.top()) - for _, k := range eKinds { - eKindNames = append(eKindNames, k.String()) - } - if eof { - eKindNames = append(eKindNames, "<EOF>") - } - } - p.synErrs = append(p.synErrs, &SyntaxError{ Row: tok.Row, Col: tok.Col, Message: "unexpected token", Token: tok, - ExpectedTerminals: eKindNames, + ExpectedTerminals: p.expectedTerms(p.top()), }) for { @@ -418,26 +406,30 @@ func (p *Parser) SyntaxErrors() []*SyntaxError { return p.synErrs } -func (p *Parser) expectedKinds(state int) ([]mlspec.LexKindName, bool) { - kinds := []mlspec.LexKindName{} - eof := false +func (p *Parser) expectedTerms(state int) []string { + kinds := []string{} terms := p.gram.ParsingTable.ExpectedTerminals[state] + aliases := p.gram.LexicalSpecification.Maleeni.KindAliases for _, tsym := range terms { - if tsym == p.gram.ParsingTable.EOFSymbol { - eof = true - continue - } - // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol // intentionally. if tsym == p.gram.ParsingTable.ErrorSymbol { continue } - kindID := p.gram.LexicalSpecification.Maleeni.TerminalToKind[tsym] - kindName := p.gram.LexicalSpecification.Maleeni.Spec.KindNames[kindID] - kinds = append(kinds, kindName) + if tsym == p.gram.ParsingTable.EOFSymbol { + kinds = append(kinds, "<eof>") + continue + } + + if alias := aliases[tsym]; alias != "" { + kinds = append(kinds, alias) + } else { + term2Kind := p.gram.LexicalSpecification.Maleeni.TerminalToKind + kindNames := p.gram.LexicalSpecification.Maleeni.Spec.KindNames + kinds = append(kinds, kindNames[term2Kind[tsym]].String()) + } } - return kinds, eof + return kinds } diff --git a/grammar/grammar.go b/grammar/grammar.go index 27fafc9..b911ee2 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -73,6 +73,7 @@ const reservedSymbolNameError = "error" type Grammar struct { lexSpec *mlspec.LexSpec skipLexKinds []mlspec.LexKindName + kindAliases map[symbol]string sym2AnonPat map[symbol]string productionSet *productionSet augmentedStartSymbol symbol @@ -158,6 +159,7 @@ func (b *GrammarBuilder) Build() (*Grammar, error) { return &Grammar{ lexSpec: symTabAndLexSpec.lexSpec, skipLexKinds: symTabAndLexSpec.skip, + kindAliases: symTabAndLexSpec.aliases, sym2AnonPat: symTabAndLexSpec.sym2AnonPat, productionSet: prodsAndActs.prods, augmentedStartSymbol: prodsAndActs.augStartSym, @@ -269,6 +271,7 @@ type symbolTableAndLexSpec struct { errSym symbol skip []mlspec.LexKindName skipSyms []string + aliases map[symbol]string } func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolTableAndLexSpec, error) { @@ -332,6 +335,7 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT skipKinds := []mlspec.LexKindName{} skipSyms := []string{} + aliases := map[symbol]string{} for _, prod := range root.LexProductions { if sym, exist := symTab.toSymbol(prod.LHS); exist { if sym == errSym { @@ -352,12 +356,12 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT continue } - _, err := symTab.registerTerminalSymbol(prod.LHS) + lhsSym, err := symTab.registerTerminalSymbol(prod.LHS) if err != nil { return nil, err } - entry, skip, specErr, err := genLexEntry(prod) + entry, skip, alias, specErr, err := genLexEntry(prod) if err != nil { return nil, err } @@ -369,6 +373,9 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT skipKinds = append(skipKinds, mlspec.LexKindName(prod.LHS)) skipSyms = append(skipSyms, prod.LHS) } + if alias != "" { + aliases[lhsSym] = alias + } entries = append(entries, entry) } @@ -402,17 +409,18 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT errSym: errSym, skip: skipKinds, skipSyms: skipSyms, + aliases: aliases, }, nil } -func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecError, error) { +func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, string, *verr.SpecError, error) { var modes []mlspec.LexModeName if prod.Directive != nil { dir := prod.Directive switch dir.Name { case "mode": if len(dir.Parameters) == 0 { - return nil, false, &verr.SpecError{ + return nil, false, "", &verr.SpecError{ Cause: semErrDirInvalidParam, Detail: fmt.Sprintf("'mode' directive needs an ID parameter"), Row: dir.Pos.Row, @@ -421,7 +429,7 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE } for _, param := range dir.Parameters { if param.ID == "" { - return nil, false, &verr.SpecError{ + return nil, false, "", &verr.SpecError{ Cause: semErrDirInvalidParam, Detail: fmt.Sprintf("'mode' directive needs an ID parameter"), Row: param.Pos.Row, @@ -431,7 +439,7 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE modes = append(modes, mlspec.LexModeName(param.ID)) } default: - return nil, false, &verr.SpecError{ + return nil, false, "", &verr.SpecError{ Cause: semErrDirInvalidName, Detail: dir.Name, Row: dir.Pos.Row, @@ -444,12 +452,13 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE var skip bool var push mlspec.LexModeName var pop bool + var alias string if alt.Directive != nil { dir := alt.Directive switch dir.Name { case "skip": if len(dir.Parameters) > 0 { - return nil, false, &verr.SpecError{ + return nil, false, "", &verr.SpecError{ Cause: semErrDirInvalidParam, Detail: fmt.Sprintf("'skip' directive needs no parameter"), Row: dir.Pos.Row, @@ -459,7 +468,7 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE skip = true case "push": if len(dir.Parameters) != 1 || dir.Parameters[0].ID == "" { - return nil, false, &verr.SpecError{ + return nil, false, "", &verr.SpecError{ Cause: semErrDirInvalidParam, Detail: fmt.Sprintf("'push' directive needs an ID parameter"), Row: dir.Pos.Row, @@ -469,7 +478,7 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE push = mlspec.LexModeName(dir.Parameters[0].ID) case "pop": if len(dir.Parameters) > 0 { - return nil, false, &verr.SpecError{ + return nil, false, "", &verr.SpecError{ Cause: semErrDirInvalidParam, Detail: fmt.Sprintf("'pop' directive needs no parameter"), Row: dir.Pos.Row, @@ -477,8 +486,18 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE }, nil } pop = true + case "alias": + if len(dir.Parameters) != 1 || dir.Parameters[0].String == "" { + return nil, false, "", &verr.SpecError{ + Cause: semErrDirInvalidParam, + Detail: fmt.Sprintf("'alias' directive needs a string parameter"), + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }, nil + } + alias = dir.Parameters[0].String default: - return nil, false, &verr.SpecError{ + return nil, false, "", &verr.SpecError{ Cause: semErrDirInvalidName, Detail: dir.Name, Row: dir.Pos.Row, @@ -493,7 +512,7 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, *verr.SpecE Pattern: mlspec.LexPattern(alt.Elements[0].Pattern), Push: push, Pop: pop, - }, skip, nil, nil + }, skip, alias, nil, nil } type productionsAndActions struct { @@ -920,6 +939,11 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error return nil, err } + kindAliases := make([]string, gram.symbolTable.termNum.Int()) + for _, sym := range gram.symbolTable.terminalSymbols() { + kindAliases[sym.num().Int()] = gram.kindAliases[sym] + } + nonTerms, err := gram.symbolTable.nonTerminalTexts() if err != nil { return nil, err @@ -1033,6 +1057,7 @@ func Compile(gram *Grammar, opts ...CompileOption) (*spec.CompiledGrammar, error KindToTerminal: kind2Term, TerminalToKind: term2Kind, Skip: skip, + KindAliases: kindAliases, }, }, ParsingTable: &spec.ParsingTable{ diff --git a/spec/grammar.go b/spec/grammar.go index 7901957..42e6dc2 100644 --- a/spec/grammar.go +++ b/spec/grammar.go @@ -18,6 +18,7 @@ type Maleeni struct { KindToTerminal []int `json:"kind_to_terminal"` TerminalToKind []int `json:"terminal_to_kind"` Skip []int `json:"skip"` + KindAliases []string `json:"kind_aliases"` } type ParsingTable struct { diff --git a/spec/lexer.go b/spec/lexer.go index 258faae..d513d7d 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -21,11 +21,12 @@ const ( tokenKindKWFragment = tokenKind("fragment") tokenKindID = tokenKind("id") tokenKindTerminalPattern = tokenKind("terminal pattern") + tokenKindStringLiteral = tokenKind("string") tokenKindColon = tokenKind(":") tokenKindOr = tokenKind("|") tokenKindSemicolon = tokenKind(";") tokenKindDirectiveMarker = tokenKind("#") - tokenKindTreeNodeOpen = tokenKind("'(") + tokenKindTreeNodeOpen = tokenKind("#(") tokenKindTreeNodeClose = tokenKind(")") tokenKindPosition = tokenKind("$") tokenKindExpantion = tokenKind("...") @@ -77,6 +78,14 @@ func newTerminalPatternToken(text string, pos Position) *token { } } +func newStringLiteralToken(text string, pos Position) *token { + return &token{ + kind: tokenKindStringLiteral, + text: text, + pos: pos, + } +} + func newPositionToken(num int, pos Position) *token { return &token{ kind: tokenKindPosition, @@ -234,7 +243,7 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { Col: tok.Col + 1, } } - return newTerminalPatternToken(mlspec.EscapePattern(pat), newPosition(tok.Row+1, tok.Col+1)), nil + return newStringLiteralToken(pat, newPosition(tok.Row+1, tok.Col+1)), nil case "colon": return newSymbolToken(tokenKindColon, newPosition(tok.Row+1, tok.Col+1)), nil case "or": diff --git a/spec/lexer_test.go b/spec/lexer_test.go index 51e5f59..c3540f6 100644 --- a/spec/lexer_test.go +++ b/spec/lexer_test.go @@ -16,6 +16,10 @@ func TestLexer_Run(t *testing.T) { return newTerminalPatternToken(text, newPosition(1, 0)) } + strTok := func(text string) *token { + return newStringLiteralToken(text, newPosition(1, 0)) + } + symTok := func(kind tokenKind) *token { return newSymbolToken(kind, newPosition(1, 0)) } @@ -40,7 +44,7 @@ func TestLexer_Run(t *testing.T) { tokens: []*token{ idTok("id"), termPatTok("terminal"), - termPatTok(`\.\*\+\?\|\(\)\[\\`), + strTok(`.*+?|()[\`), symTok(tokenKindColon), symTok(tokenKindOr), symTok(tokenKindSemicolon), diff --git a/spec/parser.go b/spec/parser.go index 53bf491..9778521 100644 --- a/spec/parser.go +++ b/spec/parser.go @@ -4,6 +4,7 @@ import ( "fmt" "io" + mlspec "github.com/nihei9/maleeni/spec" verr "github.com/nihei9/vartan/error" ) @@ -47,9 +48,10 @@ type DirectiveNode struct { } type ParameterNode struct { - ID string - Tree *TreeStructNode - Pos Position + ID string + String string + Tree *TreeStructNode + Pos Position } type TreeStructNode struct { @@ -262,10 +264,15 @@ func (p *parser) parseFragment() *FragmentNode { raiseSyntaxError(p.pos.Row, synErrNoColon) } - if !p.consume(tokenKindTerminalPattern) { + var rhs string + switch { + case p.consume(tokenKindTerminalPattern): + rhs = p.lastTok.text + case p.consume(tokenKindStringLiteral): + rhs = mlspec.EscapePattern(p.lastTok.text) + default: raiseSyntaxError(p.pos.Row, synErrFragmentNoPattern) } - rhs := p.lastTok.text p.consume(tokenKindNewline) @@ -398,6 +405,11 @@ func (p *parser) parseElement() *ElementNode { Pattern: p.lastTok.text, Pos: p.lastTok.pos, } + case p.consume(tokenKindStringLiteral): + return &ElementNode{ + Pattern: mlspec.EscapePattern(p.lastTok.text), + Pos: p.lastTok.pos, + } } return nil } @@ -436,6 +448,11 @@ func (p *parser) parseParameter() *ParameterNode { ID: p.lastTok.text, Pos: p.lastTok.pos, } + case p.consume(tokenKindStringLiteral): + return &ParameterNode{ + String: p.lastTok.text, + Pos: p.lastTok.pos, + } case p.consume(tokenKindTreeNodeOpen): if !p.consume(tokenKindID) { raiseSyntaxError(p.pos.Row, synErrTreeInvalidFirstElem) |