aboutsummaryrefslogtreecommitdiff
path: root/src/urubu/spec
diff options
context:
space:
mode:
Diffstat (limited to 'src/urubu/spec')
-rw-r--r--src/urubu/spec/grammar.go (renamed from src/urubu/spec/grammar/grammar.go)93
-rw-r--r--src/urubu/spec/grammar/clexspec.json (renamed from src/urubu/spec/grammar/parser/clexspec.json)0
-rw-r--r--src/urubu/spec/grammar/description.go71
-rw-r--r--src/urubu/spec/grammar/lexspec.json (renamed from src/urubu/spec/grammar/parser/lexspec.json)0
-rw-r--r--src/urubu/spec/grammar/parser.go (renamed from src/urubu/spec/grammar/parser/vartan_lexer.go)911
-rw-r--r--src/urubu/spec/grammar/parser/lexer.go297
-rw-r--r--src/urubu/spec/grammar/parser/parser.go582
-rw-r--r--src/urubu/spec/grammar/parser/syntax_error.go45
-rw-r--r--src/urubu/spec/grammar/util.go21
-rw-r--r--src/urubu/spec/test.go (renamed from src/urubu/spec/test/tree_lexer.go)1402
-rw-r--r--src/urubu/spec/test/parser.go336
-rw-r--r--src/urubu/spec/test/tree_parser.go716
-rw-r--r--src/urubu/spec/test/tree_semantic_action.go367
-rw-r--r--src/urubu/spec/tree-report.json (renamed from src/urubu/spec/test/tree-report.json)0
-rw-r--r--src/urubu/spec/tree.json (renamed from src/urubu/spec/test/tree.json)0
-rw-r--r--src/urubu/spec/tree.vartan (renamed from src/urubu/spec/test/tree.vartan)0
16 files changed, 2403 insertions, 2438 deletions
diff --git a/src/urubu/spec/grammar/grammar.go b/src/urubu/spec/grammar.go
index bf1ea89..c2708d8 100644
--- a/src/urubu/spec/grammar/grammar.go
+++ b/src/urubu/spec/grammar.go
@@ -1,6 +1,79 @@
package grammar
-import "strconv"
+import (
+ "strconv"
+ "strings"
+)
+
+type Terminal struct {
+ Number int `json:"number"`
+ Name string `json:"name"`
+ Pattern string `json:"pattern"`
+ Precedence int `json:"prec"`
+ Associativity string `json:"assoc"`
+}
+
+type NonTerminal struct {
+ Number int `json:"number"`
+ Name string `json:"name"`
+}
+
+type Production struct {
+ Number int `json:"number"`
+ LHS int `json:"lhs"`
+ RHS []int `json:"rhs"`
+ Precedence int `json:"prec"`
+ Associativity string `json:"assoc"`
+}
+
+type Item struct {
+ Production int `json:"production"`
+ Dot int `json:"dot"`
+}
+
+type Transition struct {
+ Symbol int `json:"symbol"`
+ State int `json:"state"`
+}
+
+type Reduce struct {
+ LookAhead []int `json:"look_ahead"`
+ Production int `json:"production"`
+}
+
+type SRConflict struct {
+ Symbol int `json:"symbol"`
+ State int `json:"state"`
+ Production int `json:"production"`
+ AdoptedState *int `json:"adopted_state"`
+ AdoptedProduction *int `json:"adopted_production"`
+ ResolvedBy int `json:"resolved_by"`
+}
+
+type RRConflict struct {
+ Symbol int `json:"symbol"`
+ Production1 int `json:"production_1"`
+ Production2 int `json:"production_2"`
+ AdoptedProduction int `json:"adopted_production"`
+ ResolvedBy int `json:"resolved_by"`
+}
+
+type State struct {
+ Number int `json:"number"`
+ Kernel []*Item `json:"kernel"`
+ Shift []*Transition `json:"shift"`
+ Reduce []*Reduce `json:"reduce"`
+ GoTo []*Transition `json:"goto"`
+ SRConflict []*SRConflict `json:"sr_conflict"`
+ RRConflict []*RRConflict `json:"rr_conflict"`
+}
+
+type Report struct {
+ Terminals []*Terminal `json:"terminals"`
+ NonTerminals []*NonTerminal `json:"non_terminals"`
+ Productions []*Production `json:"productions"`
+ States []*State `json:"states"`
+}
type CompiledGrammar struct {
Name string `json:"name"`
@@ -158,3 +231,21 @@ type SyntacticSpec struct {
type ASTAction struct {
Entries [][]int `json:"entries"`
}
+
+var rep = strings.NewReplacer(
+ `.`, `\.`,
+ `*`, `\*`,
+ `+`, `\+`,
+ `?`, `\?`,
+ `|`, `\|`,
+ `(`, `\(`,
+ `)`, `\)`,
+ `[`, `\[`,
+ `\`, `\\`,
+)
+
+// EscapePattern escapes the special characters.
+// For example, EscapePattern(`+`) returns `\+`.
+func EscapePattern(s string) string {
+ return rep.Replace(s)
+}
diff --git a/src/urubu/spec/grammar/parser/clexspec.json b/src/urubu/spec/grammar/clexspec.json
index d0ed3d3..d0ed3d3 100644
--- a/src/urubu/spec/grammar/parser/clexspec.json
+++ b/src/urubu/spec/grammar/clexspec.json
diff --git a/src/urubu/spec/grammar/description.go b/src/urubu/spec/grammar/description.go
deleted file mode 100644
index 0d2a0b7..0000000
--- a/src/urubu/spec/grammar/description.go
+++ /dev/null
@@ -1,71 +0,0 @@
-package grammar
-
-type Terminal struct {
- Number int `json:"number"`
- Name string `json:"name"`
- Pattern string `json:"pattern"`
- Precedence int `json:"prec"`
- Associativity string `json:"assoc"`
-}
-
-type NonTerminal struct {
- Number int `json:"number"`
- Name string `json:"name"`
-}
-
-type Production struct {
- Number int `json:"number"`
- LHS int `json:"lhs"`
- RHS []int `json:"rhs"`
- Precedence int `json:"prec"`
- Associativity string `json:"assoc"`
-}
-
-type Item struct {
- Production int `json:"production"`
- Dot int `json:"dot"`
-}
-
-type Transition struct {
- Symbol int `json:"symbol"`
- State int `json:"state"`
-}
-
-type Reduce struct {
- LookAhead []int `json:"look_ahead"`
- Production int `json:"production"`
-}
-
-type SRConflict struct {
- Symbol int `json:"symbol"`
- State int `json:"state"`
- Production int `json:"production"`
- AdoptedState *int `json:"adopted_state"`
- AdoptedProduction *int `json:"adopted_production"`
- ResolvedBy int `json:"resolved_by"`
-}
-
-type RRConflict struct {
- Symbol int `json:"symbol"`
- Production1 int `json:"production_1"`
- Production2 int `json:"production_2"`
- AdoptedProduction int `json:"adopted_production"`
- ResolvedBy int `json:"resolved_by"`
-}
-
-type State struct {
- Number int `json:"number"`
- Kernel []*Item `json:"kernel"`
- Shift []*Transition `json:"shift"`
- Reduce []*Reduce `json:"reduce"`
- GoTo []*Transition `json:"goto"`
- SRConflict []*SRConflict `json:"sr_conflict"`
- RRConflict []*RRConflict `json:"rr_conflict"`
-}
-
-type Report struct {
- Terminals []*Terminal `json:"terminals"`
- NonTerminals []*NonTerminal `json:"non_terminals"`
- Productions []*Production `json:"productions"`
- States []*State `json:"states"`
-}
diff --git a/src/urubu/spec/grammar/parser/lexspec.json b/src/urubu/spec/grammar/lexspec.json
index caf1f0e..caf1f0e 100644
--- a/src/urubu/spec/grammar/parser/lexspec.json
+++ b/src/urubu/spec/grammar/lexspec.json
diff --git a/src/urubu/spec/grammar/parser/vartan_lexer.go b/src/urubu/spec/grammar/parser.go
index 76ddfde..0e5a16b 100644
--- a/src/urubu/spec/grammar/parser/vartan_lexer.go
+++ b/src/urubu/spec/grammar/parser.go
@@ -1,11 +1,920 @@
-// Code generated by maleeni-go. DO NOT EDIT.
+//go:generate maleeni compile lexspec.json -o clexspec.json
+//go:generate maleeni-go clexspec.json --package parser
+
package parser
import (
+ _ "embed"
"fmt"
"io"
"io/ioutil"
+ "regexp"
+ "strings"
+
+ verr "urubu/error"
+ spec "urubu/spec/grammar"
+)
+
+type tokenKind string
+
+const (
+ tokenKindKWFragment = tokenKind("fragment")
+ tokenKindID = tokenKind("id")
+ tokenKindTerminalPattern = tokenKind("terminal pattern")
+ tokenKindStringLiteral = tokenKind("string")
+ tokenKindColon = tokenKind(":")
+ tokenKindOr = tokenKind("|")
+ tokenKindSemicolon = tokenKind(";")
+ tokenKindLabelMarker = tokenKind("@")
+ tokenKindDirectiveMarker = tokenKind("#")
+ tokenKindExpantion = tokenKind("...")
+ tokenKindOrderedSymbolMarker = tokenKind("$")
+ tokenKindLParen = tokenKind("(")
+ tokenKindRParen = tokenKind(")")
+ tokenKindNewline = tokenKind("newline")
+ tokenKindEOF = tokenKind("eof")
+ tokenKindInvalid = tokenKind("invalid")
+)
+
+var (
+ reIDChar = regexp.MustCompile(`^[0-9a-z_]+$`)
+ reIDInvalidDigitsPos = regexp.MustCompile(`^[0-9]`)
+)
+
+type Position struct {
+ Row int
+ Col int
+}
+
+func newPosition(row, col int) Position {
+ return Position{
+ Row: row,
+ Col: col,
+ }
+}
+
+type token struct {
+ kind tokenKind
+ text string
+ pos Position
+}
+
+func newSymbolToken(kind tokenKind, pos Position) *token {
+ return &token{
+ kind: kind,
+ pos: pos,
+ }
+}
+
+func newIDToken(text string, pos Position) *token {
+ return &token{
+ kind: tokenKindID,
+ text: text,
+ pos: pos,
+ }
+}
+
+func newTerminalPatternToken(text string, pos Position) *token {
+ return &token{
+ kind: tokenKindTerminalPattern,
+ text: text,
+ pos: pos,
+ }
+}
+
+func newStringLiteralToken(text string, pos Position) *token {
+ return &token{
+ kind: tokenKindStringLiteral,
+ text: text,
+ pos: pos,
+ }
+}
+
+func newEOFToken() *token {
+ return &token{
+ kind: tokenKindEOF,
+ }
+}
+
+func newInvalidToken(text string, pos Position) *token {
+ return &token{
+ kind: tokenKindInvalid,
+ text: text,
+ pos: pos,
+ }
+}
+
+type lexer struct {
+ d *Lexer
+ buf *token
+}
+
+func newLexer(src io.Reader) (*lexer, error) {
+ d, err := NewLexer(NewLexSpec(), src)
+ if err != nil {
+ return nil, err
+ }
+ return &lexer{
+ d: d,
+ }, nil
+}
+
+func (l *lexer) next() (*token, error) {
+ if l.buf != nil {
+ tok := l.buf
+ l.buf = nil
+ return tok, nil
+ }
+
+ var newline *token
+ for {
+ tok, err := l.lexAndSkipWSs()
+ if err != nil {
+ return nil, err
+ }
+ if tok.kind == tokenKindNewline {
+ newline = tok
+ continue
+ }
+
+ if newline != nil {
+ l.buf = tok
+ return newline, nil
+ }
+ return tok, nil
+ }
+}
+
+func (l *lexer) lexAndSkipWSs() (*token, error) {
+ var tok *Token
+ for {
+ var err error
+ tok, err = l.d.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.Invalid {
+ return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
+ }
+ if tok.EOF {
+ return newEOFToken(), nil
+ }
+ switch tok.KindID {
+ case KindIDWhiteSpace:
+ continue
+ case KindIDLineComment:
+ continue
+ }
+
+ break
+ }
+
+ switch tok.KindID {
+ case KindIDNewline:
+ return newSymbolToken(tokenKindNewline, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDKwFragment:
+ return newSymbolToken(tokenKindKWFragment, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDIdentifier:
+ if !reIDChar.Match(tok.Lexeme) {
+ return nil, &verr.SpecError{
+ Cause: synErrIDInvalidChar,
+ Detail: string(tok.Lexeme),
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ if strings.HasPrefix(string(tok.Lexeme), "_") || strings.HasSuffix(string(tok.Lexeme), "_") {
+ return nil, &verr.SpecError{
+ Cause: synErrIDInvalidUnderscorePos,
+ Detail: string(tok.Lexeme),
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ if strings.Contains(string(tok.Lexeme), "__") {
+ return nil, &verr.SpecError{
+ Cause: synErrIDConsecutiveUnderscores,
+ Detail: string(tok.Lexeme),
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ if reIDInvalidDigitsPos.Match(tok.Lexeme) {
+ return nil, &verr.SpecError{
+ Cause: synErrIDInvalidDigitsPos,
+ Detail: string(tok.Lexeme),
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ return newIDToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDTerminalOpen:
+ var b strings.Builder
+ for {
+ tok, err := l.d.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.EOF {
+ return nil, &verr.SpecError{
+ Cause: synErrUnclosedTerminal,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ switch tok.KindID {
+ case KindIDPattern:
+ // The escape sequences in a pattern string are interpreted by the lexer, except for the \".
+ // We must interpret the \" before passing them to the lexer because they are delimiters for
+ // the pattern strings.
+ fmt.Fprint(&b, strings.ReplaceAll(string(tok.Lexeme), `\"`, `"`))
+ case KindIDEscapeSymbol:
+ return nil, &verr.SpecError{
+ Cause: synErrIncompletedEscSeq,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ case KindIDTerminalClose:
+ pat := b.String()
+ if pat == "" {
+ return nil, &verr.SpecError{
+ Cause: synErrEmptyPattern,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ return newTerminalPatternToken(pat, newPosition(tok.Row+1, tok.Col+1)), nil
+ }
+ }
+ case KindIDStringLiteralOpen:
+ var b strings.Builder
+ for {
+ tok, err := l.d.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.EOF {
+ return nil, &verr.SpecError{
+ Cause: synErrUnclosedString,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ switch tok.KindID {
+ case KindIDCharSeq:
+ fmt.Fprint(&b, string(tok.Lexeme))
+ case KindIDStringLiteralClose:
+ str := b.String()
+ if str == "" {
+ return nil, &verr.SpecError{
+ Cause: synErrEmptyString,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ return newStringLiteralToken(str, newPosition(tok.Row+1, tok.Col+1)), nil
+ }
+ }
+ case KindIDColon:
+ return newSymbolToken(tokenKindColon, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDOr:
+ return newSymbolToken(tokenKindOr, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDSemicolon:
+ return newSymbolToken(tokenKindSemicolon, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDLabelMarker:
+ return newSymbolToken(tokenKindLabelMarker, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDDirectiveMarker:
+ return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDExpansion:
+ return newSymbolToken(tokenKindExpantion, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDOrderedSymbolMarker:
+ return newSymbolToken(tokenKindOrderedSymbolMarker, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDLParen:
+ return newSymbolToken(tokenKindLParen, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDRParen:
+ return newSymbolToken(tokenKindRParen, newPosition(tok.Row+1, tok.Col+1)), nil
+ default:
+ return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
+ }
+}
+
+type RootNode struct {
+ Directives []*DirectiveNode
+ Productions []*ProductionNode
+ LexProductions []*ProductionNode
+ Fragments []*FragmentNode
+}
+
+type ProductionNode struct {
+ Directives []*DirectiveNode
+ LHS string
+ RHS []*AlternativeNode
+ Pos Position
+}
+
+func (n *ProductionNode) isLexical() bool {
+ if len(n.RHS) == 1 && len(n.RHS[0].Elements) == 1 && n.RHS[0].Elements[0].Pattern != "" {
+ return true
+ }
+ return false
+}
+
+type AlternativeNode struct {
+ Elements []*ElementNode
+ Directives []*DirectiveNode
+ Pos Position
+}
+
+type ElementNode struct {
+ ID string
+ Pattern string
+ Label *LabelNode
+ Literally bool
+ Pos Position
+}
+
+type LabelNode struct {
+ Name string
+ Pos Position
+}
+
+type DirectiveNode struct {
+ Name string
+ Parameters []*ParameterNode
+ Pos Position
+}
+
+type ParameterNode struct {
+ ID string
+ Pattern string
+ String string
+ OrderedSymbol string
+ Group []*DirectiveNode
+ Expansion bool
+ Pos Position
+}
+
+type FragmentNode struct {
+ LHS string
+ RHS string
+ Pos Position
+}
+
+func raiseSyntaxError(row int, synErr *SyntaxError) {
+ panic(&verr.SpecError{
+ Cause: synErr,
+ Row: row,
+ })
+}
+
+func raiseSyntaxErrorWithDetail(row int, synErr *SyntaxError, detail string) {
+ panic(&verr.SpecError{
+ Cause: synErr,
+ Detail: detail,
+ Row: row,
+ })
+}
+
+func Parse(src io.Reader) (*RootNode, error) {
+ p, err := newParser(src)
+ if err != nil {
+ return nil, err
+ }
+
+ return p.parse()
+}
+
+type parser struct {
+ lex *lexer
+ peekedTok *token
+ lastTok *token
+ errs verr.SpecErrors
+
+ // A token position that the parser read at last.
+ // It is used as additional information in error messages.
+ pos Position
+}
+
+func newParser(src io.Reader) (*parser, error) {
+ lex, err := newLexer(src)
+ if err != nil {
+ return nil, err
+ }
+ return &parser{
+ lex: lex,
+ }, nil
+}
+
+func (p *parser) parse() (root *RootNode, retErr error) {
+ root = p.parseRoot()
+ if len(p.errs) > 0 {
+ return nil, p.errs
+ }
+
+ return root, nil
+}
+
+func (p *parser) parseRoot() *RootNode {
+ defer func() {
+ err := recover()
+ if err != nil {
+ specErr, ok := err.(*verr.SpecError)
+ if !ok {
+ panic(fmt.Errorf("an unexpected error occurred: %v", err))
+ }
+ p.errs = append(p.errs, specErr)
+ }
+ }()
+
+ var dirs []*DirectiveNode
+ var prods []*ProductionNode
+ var lexProds []*ProductionNode
+ var fragments []*FragmentNode
+ for {
+ dir := p.parseTopLevelDirective()
+ if dir != nil {
+ dirs = append(dirs, dir)
+ continue
+ }
+
+ fragment := p.parseFragment()
+ if fragment != nil {
+ fragments = append(fragments, fragment)
+ continue
+ }
+
+ prod := p.parseProduction()
+ if prod != nil {
+ if prod.isLexical() {
+ lexProds = append(lexProds, prod)
+ } else {
+ prods = append(prods, prod)
+ }
+ continue
+ }
+
+ if p.consume(tokenKindEOF) {
+ break
+ }
+ }
+
+ return &RootNode{
+ Directives: dirs,
+ Productions: prods,
+ LexProductions: lexProds,
+ Fragments: fragments,
+ }
+}
+
+func (p *parser) parseTopLevelDirective() *DirectiveNode {
+ defer func() {
+ err := recover()
+ if err == nil {
+ return
+ }
+
+ specErr, ok := err.(*verr.SpecError)
+ if !ok {
+ panic(err)
+ }
+
+ p.errs = append(p.errs, specErr)
+ p.skipOverTo(tokenKindSemicolon)
+ }()
+
+ dir := p.parseDirective()
+ if dir == nil {
+ return nil
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindSemicolon) {
+ raiseSyntaxError(p.pos.Row, synErrTopLevelDirNoSemicolon)
+ }
+
+ return dir
+}
+
+func (p *parser) parseFragment() *FragmentNode {
+ defer func() {
+ err := recover()
+ if err == nil {
+ return
+ }
+
+ specErr, ok := err.(*verr.SpecError)
+ if !ok {
+ panic(err)
+ }
+
+ p.errs = append(p.errs, specErr)
+ p.skipOverTo(tokenKindSemicolon)
+ }()
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindKWFragment) {
+ return nil
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoProductionName)
+ }
+ lhs := p.lastTok.text
+ lhsPos := p.lastTok.pos
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindColon) {
+ raiseSyntaxError(p.pos.Row, synErrNoColon)
+ }
+
+ var rhs string
+ switch {
+ case p.consume(tokenKindTerminalPattern):
+ rhs = p.lastTok.text
+ case p.consume(tokenKindStringLiteral):
+ rhs = spec.EscapePattern(p.lastTok.text)
+ default:
+ raiseSyntaxError(p.pos.Row, synErrFragmentNoPattern)
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindSemicolon) {
+ raiseSyntaxError(p.pos.Row, synErrNoSemicolon)
+ }
+
+ if !p.consume(tokenKindNewline) {
+ if !p.consume(tokenKindEOF) {
+ raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline)
+ }
+ }
+
+ return &FragmentNode{
+ LHS: lhs,
+ RHS: rhs,
+ Pos: lhsPos,
+ }
+}
+
+func (p *parser) parseProduction() *ProductionNode {
+ defer func() {
+ err := recover()
+ if err == nil {
+ return
+ }
+
+ specErr, ok := err.(*verr.SpecError)
+ if !ok {
+ panic(err)
+ }
+
+ p.errs = append(p.errs, specErr)
+ p.skipOverTo(tokenKindSemicolon)
+ }()
+
+ p.consume(tokenKindNewline)
+
+ if p.consume(tokenKindEOF) {
+ return nil
+ }
+
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoProductionName)
+ }
+ lhs := p.lastTok.text
+ lhsPos := p.lastTok.pos
+
+ var dirs []*DirectiveNode
+ for {
+ dir := p.parseDirective()
+ if dir == nil {
+ break
+ }
+ dirs = append(dirs, dir)
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindColon) {
+ raiseSyntaxError(p.pos.Row, synErrNoColon)
+ }
+
+ alt := p.parseAlternative()
+ rhs := []*AlternativeNode{alt}
+ for {
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindOr) {
+ break
+ }
+ alt := p.parseAlternative()
+ rhs = append(rhs, alt)
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindSemicolon) {
+ raiseSyntaxError(p.pos.Row, synErrNoSemicolon)
+ }
+
+ if !p.consume(tokenKindNewline) {
+ if !p.consume(tokenKindEOF) {
+ raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline)
+ }
+ }
+
+ prod := &ProductionNode{
+ Directives: dirs,
+ LHS: lhs,
+ RHS: rhs,
+ Pos: lhsPos,
+ }
+
+ // Vartan's driver must provide a user with the names of expected tokens when a syntax error occurs.
+ // However, if a pattern appears directly in an alternative, Vartan's compiler cannot assign an appropriate
+ // name to the pattern. Therefore, this code prohibits alternatives from containing patterns.
+ if !prod.isLexical() {
+ for _, alt := range prod.RHS {
+ for _, elem := range alt.Elements {
+ if elem.Pattern != "" {
+ raiseSyntaxError(elem.Pos.Row, synErrPatternInAlt)
+ }
+ }
+ }
+ }
+
+ return prod
+}
+
+func (p *parser) parseAlternative() *AlternativeNode {
+ elems := []*ElementNode{}
+ for {
+ elem := p.parseElement()
+ if elem == nil {
+ break
+ }
+ elems = append(elems, elem)
+ }
+
+ // When a length of an alternative is zero, we cannot set a position.
+ var firstElemPos Position
+ if len(elems) > 0 {
+ firstElemPos = elems[0].Pos
+ }
+
+ var dirs []*DirectiveNode
+ for {
+ dir := p.parseDirective()
+ if dir == nil {
+ break
+ }
+ dirs = append(dirs, dir)
+ }
+
+ return &AlternativeNode{
+ Elements: elems,
+ Directives: dirs,
+ Pos: firstElemPos,
+ }
+}
+
+func (p *parser) parseElement() *ElementNode {
+ var elem *ElementNode
+ switch {
+ case p.consume(tokenKindID):
+ elem = &ElementNode{
+ ID: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindTerminalPattern):
+ elem = &ElementNode{
+ Pattern: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindStringLiteral):
+ elem = &ElementNode{
+ Pattern: p.lastTok.text,
+ Literally: true,
+ Pos: p.lastTok.pos,
+ }
+ default:
+ if p.consume(tokenKindLabelMarker) {
+ raiseSyntaxError(p.pos.Row, synErrLabelWithNoSymbol)
+ }
+ return nil
+ }
+ if p.consume(tokenKindLabelMarker) {
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoLabel)
+ }
+ elem.Label = &LabelNode{
+ Name: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ }
+ return elem
+}
+
+func (p *parser) parseDirective() *DirectiveNode {
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindDirectiveMarker) {
+ return nil
+ }
+ dirPos := p.lastTok.pos
+
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoDirectiveName)
+ }
+ name := p.lastTok.text
+
+ var params []*ParameterNode
+ for {
+ param := p.parseParameter()
+ if param == nil {
+ break
+ }
+ params = append(params, param)
+ }
+
+ return &DirectiveNode{
+ Name: name,
+ Parameters: params,
+ Pos: dirPos,
+ }
+}
+
+func (p *parser) parseParameter() *ParameterNode {
+ var param *ParameterNode
+ switch {
+ case p.consume(tokenKindID):
+ param = &ParameterNode{
+ ID: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindTerminalPattern):
+ param = &ParameterNode{
+ Pattern: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindStringLiteral):
+ param = &ParameterNode{
+ String: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindOrderedSymbolMarker):
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoOrderedSymbolName)
+ }
+ param = &ParameterNode{
+ OrderedSymbol: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindLParen):
+ pos := p.lastTok.pos
+ var g []*DirectiveNode
+ for {
+ dir := p.parseDirective()
+ if dir == nil {
+ break
+ }
+ g = append(g, dir)
+ }
+ if !p.consume(tokenKindRParen) {
+ raiseSyntaxError(p.pos.Row, synErrUnclosedDirGroup)
+ }
+ if len(g) == 0 {
+ // Set an empty slice representing an empty directive group to distinguish between the following two cases.
+ //
+ // - #prec (); // vartan allows this case.
+ // - #prec; // This case will raise an error.
+ g = []*DirectiveNode{}
+ }
+ param = &ParameterNode{
+ Group: g,
+ Pos: pos,
+ }
+ }
+ if p.consume(tokenKindExpantion) {
+ switch {
+ case param == nil:
+ raiseSyntaxError(p.pos.Row, synErrStrayExpOp)
+ case param.ID == "":
+ raiseSyntaxError(p.pos.Row, synErrInvalidExpOperand)
+ }
+ param.Expansion = true
+ }
+ return param
+}
+
+func (p *parser) consume(expected tokenKind) bool {
+ var tok *token
+ var err error
+ if p.peekedTok != nil {
+ tok = p.peekedTok
+ p.peekedTok = nil
+ } else {
+ tok, err = p.lex.next()
+ if err != nil {
+ panic(err)
+ }
+ }
+ p.pos = tok.pos
+ if tok.kind == tokenKindInvalid {
+ raiseSyntaxErrorWithDetail(p.pos.Row, synErrInvalidToken, tok.text)
+ }
+ if tok.kind == expected {
+ p.lastTok = tok
+ return true
+ }
+ p.peekedTok = tok
+
+ return false
+}
+
+func (p *parser) skip() {
+ var tok *token
+ var err error
+ for {
+ if p.peekedTok != nil {
+ tok = p.peekedTok
+ p.peekedTok = nil
+ } else {
+ tok, err = p.lex.next()
+ if err != nil {
+ p.errs = append(p.errs, &verr.SpecError{
+ Cause: err,
+ Row: p.pos.Row,
+ })
+ continue
+ }
+ }
+
+ break
+ }
+
+ p.lastTok = tok
+ p.pos = tok.pos
+}
+
+func (p *parser) skipOverTo(kind tokenKind) {
+ for {
+ if p.consume(kind) || p.consume(tokenKindEOF) {
+ return
+ }
+ p.skip()
+ }
+}
+
+type SyntaxError struct {
+ message string
+}
+
+func newSyntaxError(message string) *SyntaxError {
+ return &SyntaxError{
+ message: message,
+ }
+}
+
+func (e *SyntaxError) Error() string {
+ return e.message
+}
+
+var (
+ // lexical errors
+ synErrIDInvalidChar = newSyntaxError("an identifier can contain only the lower-case letter, the digits, and the underscore")
+ synErrIDInvalidUnderscorePos = newSyntaxError("the underscore cannot be placed at the beginning or end of an identifier")
+ synErrIDConsecutiveUnderscores = newSyntaxError("the underscore cannot be placed consecutively")
+ synErrIDInvalidDigitsPos = newSyntaxError("the digits cannot be placed at the biginning of an identifier")
+ synErrUnclosedTerminal = newSyntaxError("unclosed terminal")
+ synErrUnclosedString = newSyntaxError("unclosed string")
+ synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash")
+ synErrEmptyPattern = newSyntaxError("a pattern must include at least one character")
+ synErrEmptyString = newSyntaxError("a string must include at least one character")
+
+ // syntax errors
+ synErrInvalidToken = newSyntaxError("invalid token")
+ synErrTopLevelDirNoSemicolon = newSyntaxError("a top-level directive must be followed by ;")
+ synErrNoProductionName = newSyntaxError("a production name is missing")
+ synErrNoColon = newSyntaxError("the colon must precede alternatives")
+ synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative")
+ synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol")
+ synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @")
+ synErrNoDirectiveName = newSyntaxError("a directive needs a name")
+ synErrNoOrderedSymbolName = newSyntaxError("an ordered symbol name is missing")
+ synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )")
+ synErrPatternInAlt = newSyntaxError("a pattern literal cannot appear directly in an alternative. instead, please define a terminal symbol with the pattern literal")
+ synErrStrayExpOp = newSyntaxError("an expansion operator ... must be preceded by an identifier")
+ synErrInvalidExpOperand = newSyntaxError("an expansion operator ... can be applied to only an identifier")
+ synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline")
+ synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element")
)
+// Code generated by maleeni-go. DO NOT EDIT.
type ModeID int
diff --git a/src/urubu/spec/grammar/parser/lexer.go b/src/urubu/spec/grammar/parser/lexer.go
deleted file mode 100644
index bd8a24f..0000000
--- a/src/urubu/spec/grammar/parser/lexer.go
+++ /dev/null
@@ -1,297 +0,0 @@
-//go:generate maleeni compile lexspec.json -o clexspec.json
-//go:generate maleeni-go clexspec.json --package parser
-
-package parser
-
-import (
- _ "embed"
- "fmt"
- "io"
- "regexp"
- "strings"
-
- verr "urubu/error"
-)
-
-type tokenKind string
-
-const (
- tokenKindKWFragment = tokenKind("fragment")
- tokenKindID = tokenKind("id")
- tokenKindTerminalPattern = tokenKind("terminal pattern")
- tokenKindStringLiteral = tokenKind("string")
- tokenKindColon = tokenKind(":")
- tokenKindOr = tokenKind("|")
- tokenKindSemicolon = tokenKind(";")
- tokenKindLabelMarker = tokenKind("@")
- tokenKindDirectiveMarker = tokenKind("#")
- tokenKindExpantion = tokenKind("...")
- tokenKindOrderedSymbolMarker = tokenKind("$")
- tokenKindLParen = tokenKind("(")
- tokenKindRParen = tokenKind(")")
- tokenKindNewline = tokenKind("newline")
- tokenKindEOF = tokenKind("eof")
- tokenKindInvalid = tokenKind("invalid")
-)
-
-var (
- reIDChar = regexp.MustCompile(`^[0-9a-z_]+$`)
- reIDInvalidDigitsPos = regexp.MustCompile(`^[0-9]`)
-)
-
-type Position struct {
- Row int
- Col int
-}
-
-func newPosition(row, col int) Position {
- return Position{
- Row: row,
- Col: col,
- }
-}
-
-type token struct {
- kind tokenKind
- text string
- pos Position
-}
-
-func newSymbolToken(kind tokenKind, pos Position) *token {
- return &token{
- kind: kind,
- pos: pos,
- }
-}
-
-func newIDToken(text string, pos Position) *token {
- return &token{
- kind: tokenKindID,
- text: text,
- pos: pos,
- }
-}
-
-func newTerminalPatternToken(text string, pos Position) *token {
- return &token{
- kind: tokenKindTerminalPattern,
- text: text,
- pos: pos,
- }
-}
-
-func newStringLiteralToken(text string, pos Position) *token {
- return &token{
- kind: tokenKindStringLiteral,
- text: text,
- pos: pos,
- }
-}
-
-func newEOFToken() *token {
- return &token{
- kind: tokenKindEOF,
- }
-}
-
-func newInvalidToken(text string, pos Position) *token {
- return &token{
- kind: tokenKindInvalid,
- text: text,
- pos: pos,
- }
-}
-
-type lexer struct {
- d *Lexer
- buf *token
-}
-
-func newLexer(src io.Reader) (*lexer, error) {
- d, err := NewLexer(NewLexSpec(), src)
- if err != nil {
- return nil, err
- }
- return &lexer{
- d: d,
- }, nil
-}
-
-func (l *lexer) next() (*token, error) {
- if l.buf != nil {
- tok := l.buf
- l.buf = nil
- return tok, nil
- }
-
- var newline *token
- for {
- tok, err := l.lexAndSkipWSs()
- if err != nil {
- return nil, err
- }
- if tok.kind == tokenKindNewline {
- newline = tok
- continue
- }
-
- if newline != nil {
- l.buf = tok
- return newline, nil
- }
- return tok, nil
- }
-}
-
-func (l *lexer) lexAndSkipWSs() (*token, error) {
- var tok *Token
- for {
- var err error
- tok, err = l.d.Next()
- if err != nil {
- return nil, err
- }
- if tok.Invalid {
- return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
- }
- if tok.EOF {
- return newEOFToken(), nil
- }
- switch tok.KindID {
- case KindIDWhiteSpace:
- continue
- case KindIDLineComment:
- continue
- }
-
- break
- }
-
- switch tok.KindID {
- case KindIDNewline:
- return newSymbolToken(tokenKindNewline, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDKwFragment:
- return newSymbolToken(tokenKindKWFragment, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDIdentifier:
- if !reIDChar.Match(tok.Lexeme) {
- return nil, &verr.SpecError{
- Cause: synErrIDInvalidChar,
- Detail: string(tok.Lexeme),
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- if strings.HasPrefix(string(tok.Lexeme), "_") || strings.HasSuffix(string(tok.Lexeme), "_") {
- return nil, &verr.SpecError{
- Cause: synErrIDInvalidUnderscorePos,
- Detail: string(tok.Lexeme),
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- if strings.Contains(string(tok.Lexeme), "__") {
- return nil, &verr.SpecError{
- Cause: synErrIDConsecutiveUnderscores,
- Detail: string(tok.Lexeme),
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- if reIDInvalidDigitsPos.Match(tok.Lexeme) {
- return nil, &verr.SpecError{
- Cause: synErrIDInvalidDigitsPos,
- Detail: string(tok.Lexeme),
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- return newIDToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDTerminalOpen:
- var b strings.Builder
- for {
- tok, err := l.d.Next()
- if err != nil {
- return nil, err
- }
- if tok.EOF {
- return nil, &verr.SpecError{
- Cause: synErrUnclosedTerminal,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- switch tok.KindID {
- case KindIDPattern:
- // The escape sequences in a pattern string are interpreted by the lexer, except for the \".
- // We must interpret the \" before passing them to the lexer because they are delimiters for
- // the pattern strings.
- fmt.Fprint(&b, strings.ReplaceAll(string(tok.Lexeme), `\"`, `"`))
- case KindIDEscapeSymbol:
- return nil, &verr.SpecError{
- Cause: synErrIncompletedEscSeq,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- case KindIDTerminalClose:
- pat := b.String()
- if pat == "" {
- return nil, &verr.SpecError{
- Cause: synErrEmptyPattern,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- return newTerminalPatternToken(pat, newPosition(tok.Row+1, tok.Col+1)), nil
- }
- }
- case KindIDStringLiteralOpen:
- var b strings.Builder
- for {
- tok, err := l.d.Next()
- if err != nil {
- return nil, err
- }
- if tok.EOF {
- return nil, &verr.SpecError{
- Cause: synErrUnclosedString,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- switch tok.KindID {
- case KindIDCharSeq:
- fmt.Fprint(&b, string(tok.Lexeme))
- case KindIDStringLiteralClose:
- str := b.String()
- if str == "" {
- return nil, &verr.SpecError{
- Cause: synErrEmptyString,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- return newStringLiteralToken(str, newPosition(tok.Row+1, tok.Col+1)), nil
- }
- }
- case KindIDColon:
- return newSymbolToken(tokenKindColon, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDOr:
- return newSymbolToken(tokenKindOr, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDSemicolon:
- return newSymbolToken(tokenKindSemicolon, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDLabelMarker:
- return newSymbolToken(tokenKindLabelMarker, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDDirectiveMarker:
- return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDExpansion:
- return newSymbolToken(tokenKindExpantion, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDOrderedSymbolMarker:
- return newSymbolToken(tokenKindOrderedSymbolMarker, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDLParen:
- return newSymbolToken(tokenKindLParen, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDRParen:
- return newSymbolToken(tokenKindRParen, newPosition(tok.Row+1, tok.Col+1)), nil
- default:
- return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
- }
-}
diff --git a/src/urubu/spec/grammar/parser/parser.go b/src/urubu/spec/grammar/parser/parser.go
deleted file mode 100644
index b604074..0000000
--- a/src/urubu/spec/grammar/parser/parser.go
+++ /dev/null
@@ -1,582 +0,0 @@
-package parser
-
-import (
- "fmt"
- "io"
-
- verr "urubu/error"
- spec "urubu/spec/grammar"
-)
-
-type RootNode struct {
- Directives []*DirectiveNode
- Productions []*ProductionNode
- LexProductions []*ProductionNode
- Fragments []*FragmentNode
-}
-
-type ProductionNode struct {
- Directives []*DirectiveNode
- LHS string
- RHS []*AlternativeNode
- Pos Position
-}
-
-func (n *ProductionNode) isLexical() bool {
- if len(n.RHS) == 1 && len(n.RHS[0].Elements) == 1 && n.RHS[0].Elements[0].Pattern != "" {
- return true
- }
- return false
-}
-
-type AlternativeNode struct {
- Elements []*ElementNode
- Directives []*DirectiveNode
- Pos Position
-}
-
-type ElementNode struct {
- ID string
- Pattern string
- Label *LabelNode
- Literally bool
- Pos Position
-}
-
-type LabelNode struct {
- Name string
- Pos Position
-}
-
-type DirectiveNode struct {
- Name string
- Parameters []*ParameterNode
- Pos Position
-}
-
-type ParameterNode struct {
- ID string
- Pattern string
- String string
- OrderedSymbol string
- Group []*DirectiveNode
- Expansion bool
- Pos Position
-}
-
-type FragmentNode struct {
- LHS string
- RHS string
- Pos Position
-}
-
-func raiseSyntaxError(row int, synErr *SyntaxError) {
- panic(&verr.SpecError{
- Cause: synErr,
- Row: row,
- })
-}
-
-func raiseSyntaxErrorWithDetail(row int, synErr *SyntaxError, detail string) {
- panic(&verr.SpecError{
- Cause: synErr,
- Detail: detail,
- Row: row,
- })
-}
-
-func Parse(src io.Reader) (*RootNode, error) {
- p, err := newParser(src)
- if err != nil {
- return nil, err
- }
-
- return p.parse()
-}
-
-type parser struct {
- lex *lexer
- peekedTok *token
- lastTok *token
- errs verr.SpecErrors
-
- // A token position that the parser read at last.
- // It is used as additional information in error messages.
- pos Position
-}
-
-func newParser(src io.Reader) (*parser, error) {
- lex, err := newLexer(src)
- if err != nil {
- return nil, err
- }
- return &parser{
- lex: lex,
- }, nil
-}
-
-func (p *parser) parse() (root *RootNode, retErr error) {
- root = p.parseRoot()
- if len(p.errs) > 0 {
- return nil, p.errs
- }
-
- return root, nil
-}
-
-func (p *parser) parseRoot() *RootNode {
- defer func() {
- err := recover()
- if err != nil {
- specErr, ok := err.(*verr.SpecError)
- if !ok {
- panic(fmt.Errorf("an unexpected error occurred: %v", err))
- }
- p.errs = append(p.errs, specErr)
- }
- }()
-
- var dirs []*DirectiveNode
- var prods []*ProductionNode
- var lexProds []*ProductionNode
- var fragments []*FragmentNode
- for {
- dir := p.parseTopLevelDirective()
- if dir != nil {
- dirs = append(dirs, dir)
- continue
- }
-
- fragment := p.parseFragment()
- if fragment != nil {
- fragments = append(fragments, fragment)
- continue
- }
-
- prod := p.parseProduction()
- if prod != nil {
- if prod.isLexical() {
- lexProds = append(lexProds, prod)
- } else {
- prods = append(prods, prod)
- }
- continue
- }
-
- if p.consume(tokenKindEOF) {
- break
- }
- }
-
- return &RootNode{
- Directives: dirs,
- Productions: prods,
- LexProductions: lexProds,
- Fragments: fragments,
- }
-}
-
-func (p *parser) parseTopLevelDirective() *DirectiveNode {
- defer func() {
- err := recover()
- if err == nil {
- return
- }
-
- specErr, ok := err.(*verr.SpecError)
- if !ok {
- panic(err)
- }
-
- p.errs = append(p.errs, specErr)
- p.skipOverTo(tokenKindSemicolon)
- }()
-
- dir := p.parseDirective()
- if dir == nil {
- return nil
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindSemicolon) {
- raiseSyntaxError(p.pos.Row, synErrTopLevelDirNoSemicolon)
- }
-
- return dir
-}
-
-func (p *parser) parseFragment() *FragmentNode {
- defer func() {
- err := recover()
- if err == nil {
- return
- }
-
- specErr, ok := err.(*verr.SpecError)
- if !ok {
- panic(err)
- }
-
- p.errs = append(p.errs, specErr)
- p.skipOverTo(tokenKindSemicolon)
- }()
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindKWFragment) {
- return nil
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoProductionName)
- }
- lhs := p.lastTok.text
- lhsPos := p.lastTok.pos
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindColon) {
- raiseSyntaxError(p.pos.Row, synErrNoColon)
- }
-
- var rhs string
- switch {
- case p.consume(tokenKindTerminalPattern):
- rhs = p.lastTok.text
- case p.consume(tokenKindStringLiteral):
- rhs = spec.EscapePattern(p.lastTok.text)
- default:
- raiseSyntaxError(p.pos.Row, synErrFragmentNoPattern)
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindSemicolon) {
- raiseSyntaxError(p.pos.Row, synErrNoSemicolon)
- }
-
- if !p.consume(tokenKindNewline) {
- if !p.consume(tokenKindEOF) {
- raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline)
- }
- }
-
- return &FragmentNode{
- LHS: lhs,
- RHS: rhs,
- Pos: lhsPos,
- }
-}
-
-func (p *parser) parseProduction() *ProductionNode {
- defer func() {
- err := recover()
- if err == nil {
- return
- }
-
- specErr, ok := err.(*verr.SpecError)
- if !ok {
- panic(err)
- }
-
- p.errs = append(p.errs, specErr)
- p.skipOverTo(tokenKindSemicolon)
- }()
-
- p.consume(tokenKindNewline)
-
- if p.consume(tokenKindEOF) {
- return nil
- }
-
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoProductionName)
- }
- lhs := p.lastTok.text
- lhsPos := p.lastTok.pos
-
- var dirs []*DirectiveNode
- for {
- dir := p.parseDirective()
- if dir == nil {
- break
- }
- dirs = append(dirs, dir)
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindColon) {
- raiseSyntaxError(p.pos.Row, synErrNoColon)
- }
-
- alt := p.parseAlternative()
- rhs := []*AlternativeNode{alt}
- for {
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindOr) {
- break
- }
- alt := p.parseAlternative()
- rhs = append(rhs, alt)
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindSemicolon) {
- raiseSyntaxError(p.pos.Row, synErrNoSemicolon)
- }
-
- if !p.consume(tokenKindNewline) {
- if !p.consume(tokenKindEOF) {
- raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline)
- }
- }
-
- prod := &ProductionNode{
- Directives: dirs,
- LHS: lhs,
- RHS: rhs,
- Pos: lhsPos,
- }
-
- // Vartan's driver must provide a user with the names of expected tokens when a syntax error occurs.
- // However, if a pattern appears directly in an alternative, Vartan's compiler cannot assign an appropriate
- // name to the pattern. Therefore, this code prohibits alternatives from containing patterns.
- if !prod.isLexical() {
- for _, alt := range prod.RHS {
- for _, elem := range alt.Elements {
- if elem.Pattern != "" {
- raiseSyntaxError(elem.Pos.Row, synErrPatternInAlt)
- }
- }
- }
- }
-
- return prod
-}
-
-func (p *parser) parseAlternative() *AlternativeNode {
- elems := []*ElementNode{}
- for {
- elem := p.parseElement()
- if elem == nil {
- break
- }
- elems = append(elems, elem)
- }
-
- // When a length of an alternative is zero, we cannot set a position.
- var firstElemPos Position
- if len(elems) > 0 {
- firstElemPos = elems[0].Pos
- }
-
- var dirs []*DirectiveNode
- for {
- dir := p.parseDirective()
- if dir == nil {
- break
- }
- dirs = append(dirs, dir)
- }
-
- return &AlternativeNode{
- Elements: elems,
- Directives: dirs,
- Pos: firstElemPos,
- }
-}
-
-func (p *parser) parseElement() *ElementNode {
- var elem *ElementNode
- switch {
- case p.consume(tokenKindID):
- elem = &ElementNode{
- ID: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindTerminalPattern):
- elem = &ElementNode{
- Pattern: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindStringLiteral):
- elem = &ElementNode{
- Pattern: p.lastTok.text,
- Literally: true,
- Pos: p.lastTok.pos,
- }
- default:
- if p.consume(tokenKindLabelMarker) {
- raiseSyntaxError(p.pos.Row, synErrLabelWithNoSymbol)
- }
- return nil
- }
- if p.consume(tokenKindLabelMarker) {
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoLabel)
- }
- elem.Label = &LabelNode{
- Name: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- }
- return elem
-}
-
-func (p *parser) parseDirective() *DirectiveNode {
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindDirectiveMarker) {
- return nil
- }
- dirPos := p.lastTok.pos
-
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoDirectiveName)
- }
- name := p.lastTok.text
-
- var params []*ParameterNode
- for {
- param := p.parseParameter()
- if param == nil {
- break
- }
- params = append(params, param)
- }
-
- return &DirectiveNode{
- Name: name,
- Parameters: params,
- Pos: dirPos,
- }
-}
-
-func (p *parser) parseParameter() *ParameterNode {
- var param *ParameterNode
- switch {
- case p.consume(tokenKindID):
- param = &ParameterNode{
- ID: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindTerminalPattern):
- param = &ParameterNode{
- Pattern: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindStringLiteral):
- param = &ParameterNode{
- String: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindOrderedSymbolMarker):
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoOrderedSymbolName)
- }
- param = &ParameterNode{
- OrderedSymbol: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindLParen):
- pos := p.lastTok.pos
- var g []*DirectiveNode
- for {
- dir := p.parseDirective()
- if dir == nil {
- break
- }
- g = append(g, dir)
- }
- if !p.consume(tokenKindRParen) {
- raiseSyntaxError(p.pos.Row, synErrUnclosedDirGroup)
- }
- if len(g) == 0 {
- // Set an empty slice representing an empty directive group to distinguish between the following two cases.
- //
- // - #prec (); // vartan allows this case.
- // - #prec; // This case will raise an error.
- g = []*DirectiveNode{}
- }
- param = &ParameterNode{
- Group: g,
- Pos: pos,
- }
- }
- if p.consume(tokenKindExpantion) {
- switch {
- case param == nil:
- raiseSyntaxError(p.pos.Row, synErrStrayExpOp)
- case param.ID == "":
- raiseSyntaxError(p.pos.Row, synErrInvalidExpOperand)
- }
- param.Expansion = true
- }
- return param
-}
-
-func (p *parser) consume(expected tokenKind) bool {
- var tok *token
- var err error
- if p.peekedTok != nil {
- tok = p.peekedTok
- p.peekedTok = nil
- } else {
- tok, err = p.lex.next()
- if err != nil {
- panic(err)
- }
- }
- p.pos = tok.pos
- if tok.kind == tokenKindInvalid {
- raiseSyntaxErrorWithDetail(p.pos.Row, synErrInvalidToken, tok.text)
- }
- if tok.kind == expected {
- p.lastTok = tok
- return true
- }
- p.peekedTok = tok
-
- return false
-}
-
-func (p *parser) skip() {
- var tok *token
- var err error
- for {
- if p.peekedTok != nil {
- tok = p.peekedTok
- p.peekedTok = nil
- } else {
- tok, err = p.lex.next()
- if err != nil {
- p.errs = append(p.errs, &verr.SpecError{
- Cause: err,
- Row: p.pos.Row,
- })
- continue
- }
- }
-
- break
- }
-
- p.lastTok = tok
- p.pos = tok.pos
-}
-
-func (p *parser) skipOverTo(kind tokenKind) {
- for {
- if p.consume(kind) || p.consume(tokenKindEOF) {
- return
- }
- p.skip()
- }
-}
diff --git a/src/urubu/spec/grammar/parser/syntax_error.go b/src/urubu/spec/grammar/parser/syntax_error.go
deleted file mode 100644
index 719fb94..0000000
--- a/src/urubu/spec/grammar/parser/syntax_error.go
+++ /dev/null
@@ -1,45 +0,0 @@
-package parser
-
-type SyntaxError struct {
- message string
-}
-
-func newSyntaxError(message string) *SyntaxError {
- return &SyntaxError{
- message: message,
- }
-}
-
-func (e *SyntaxError) Error() string {
- return e.message
-}
-
-var (
- // lexical errors
- synErrIDInvalidChar = newSyntaxError("an identifier can contain only the lower-case letter, the digits, and the underscore")
- synErrIDInvalidUnderscorePos = newSyntaxError("the underscore cannot be placed at the beginning or end of an identifier")
- synErrIDConsecutiveUnderscores = newSyntaxError("the underscore cannot be placed consecutively")
- synErrIDInvalidDigitsPos = newSyntaxError("the digits cannot be placed at the biginning of an identifier")
- synErrUnclosedTerminal = newSyntaxError("unclosed terminal")
- synErrUnclosedString = newSyntaxError("unclosed string")
- synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash")
- synErrEmptyPattern = newSyntaxError("a pattern must include at least one character")
- synErrEmptyString = newSyntaxError("a string must include at least one character")
-
- // syntax errors
- synErrInvalidToken = newSyntaxError("invalid token")
- synErrTopLevelDirNoSemicolon = newSyntaxError("a top-level directive must be followed by ;")
- synErrNoProductionName = newSyntaxError("a production name is missing")
- synErrNoColon = newSyntaxError("the colon must precede alternatives")
- synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative")
- synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol")
- synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @")
- synErrNoDirectiveName = newSyntaxError("a directive needs a name")
- synErrNoOrderedSymbolName = newSyntaxError("an ordered symbol name is missing")
- synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )")
- synErrPatternInAlt = newSyntaxError("a pattern literal cannot appear directly in an alternative. instead, please define a terminal symbol with the pattern literal")
- synErrStrayExpOp = newSyntaxError("an expansion operator ... must be preceded by an identifier")
- synErrInvalidExpOperand = newSyntaxError("an expansion operator ... can be applied to only an identifier")
- synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline")
- synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element")
-)
diff --git a/src/urubu/spec/grammar/util.go b/src/urubu/spec/grammar/util.go
deleted file mode 100644
index bf3f233..0000000
--- a/src/urubu/spec/grammar/util.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package grammar
-
-import "strings"
-
-var rep = strings.NewReplacer(
- `.`, `\.`,
- `*`, `\*`,
- `+`, `\+`,
- `?`, `\?`,
- `|`, `\|`,
- `(`, `\(`,
- `)`, `\)`,
- `[`, `\[`,
- `\`, `\\`,
-)
-
-// EscapePattern escapes the special characters.
-// For example, EscapePattern(`+`) returns `\+`.
-func EscapePattern(s string) string {
- return rep.Replace(s)
-}
diff --git a/src/urubu/spec/test/tree_lexer.go b/src/urubu/spec/test.go
index 8bb1c87..4985e14 100644
--- a/src/urubu/spec/test/tree_lexer.go
+++ b/src/urubu/spec/test.go
@@ -1,11 +1,342 @@
-// Code generated by vartan-go. DO NOT EDIT.
+//go:generate vartan compile tree.vartan -o tree.json
+//go:generate vartan-go tree.json --package test
+
package test
import (
+ "bufio"
+ "bytes"
+ "encoding/json"
+ "errors"
"fmt"
"io"
+ "regexp"
+ "strconv"
+ "strings"
+ "unicode/utf8"
)
+type TreeDiff struct {
+ ExpectedPath string
+ ActualPath string
+ Message string
+}
+
+func newTreeDiff(expected, actual *Tree, message string) *TreeDiff {
+ return &TreeDiff{
+ ExpectedPath: expected.path(),
+ ActualPath: actual.path(),
+ Message: message,
+ }
+}
+
+type Tree struct {
+ Parent *Tree
+ Offset int
+ Kind string
+ Children []*Tree
+ Lexeme string
+}
+
+func NewNonTerminalTree(kind string, children ...*Tree) *Tree {
+ return &Tree{
+ Kind: kind,
+ Children: children,
+ }
+}
+
+func NewTerminalNode(kind string, lexeme string) *Tree {
+ return &Tree{
+ Kind: kind,
+ Lexeme: lexeme,
+ }
+}
+
+func (t *Tree) Fill() *Tree {
+ for i, c := range t.Children {
+ c.Parent = t
+ c.Offset = i
+ c.Fill()
+ }
+ return t
+}
+
+func (t *Tree) path() string {
+ if t.Parent == nil {
+ return t.Kind
+ }
+ return fmt.Sprintf("%v.[%v]%v", t.Parent.path(), t.Offset, t.Kind)
+}
+
+func (t *Tree) Format() []byte {
+ var b bytes.Buffer
+ t.format(&b, 0)
+ return b.Bytes()
+}
+
+func (t *Tree) format(buf *bytes.Buffer, depth int) {
+ for i := 0; i < depth; i++ {
+ buf.WriteString(" ")
+ }
+ buf.WriteString("(")
+ buf.WriteString(t.Kind)
+ if len(t.Children) > 0 {
+ buf.WriteString("\n")
+ for i, c := range t.Children {
+ c.format(buf, depth+1)
+ if i < len(t.Children)-1 {
+ buf.WriteString("\n")
+ }
+ }
+ }
+ buf.WriteString(")")
+}
+
+func DiffTree(expected, actual *Tree) []*TreeDiff {
+ if expected == nil && actual == nil {
+ return nil
+ }
+ if actual.Kind != expected.Kind {
+ msg := fmt.Sprintf("unexpected kind: expected '%v' but got '%v'", expected.Kind, actual.Kind)
+ return []*TreeDiff{
+ newTreeDiff(expected, actual, msg),
+ }
+ }
+ if expected.Lexeme != actual.Lexeme {
+ msg := fmt.Sprintf("unexpected lexeme: expected '%v' but got '%v'", expected.Lexeme, actual.Lexeme)
+ return []*TreeDiff{
+ newTreeDiff(expected, actual, msg),
+ }
+ }
+ if len(actual.Children) != len(expected.Children) {
+ msg := fmt.Sprintf("unexpected node count: expected %v but got %v", len(expected.Children), len(actual.Children))
+ return []*TreeDiff{
+ newTreeDiff(expected, actual, msg),
+ }
+ }
+ var diffs []*TreeDiff
+ for i, exp := range expected.Children {
+ if ds := DiffTree(exp, actual.Children[i]); len(ds) > 0 {
+ diffs = append(diffs, ds...)
+ }
+ }
+ return diffs
+}
+
+type TestCase struct {
+ Description string
+ Source []byte
+ Output *Tree
+}
+
+func ParseTestCase(r io.Reader) (*TestCase, error) {
+ parts, err := splitIntoParts(r)
+ if err != nil {
+ return nil, err
+ }
+ if len(parts) != 3 {
+ return nil, fmt.Errorf("too many or too few part delimiters: a test case consists of just tree parts: %v parts found", len(parts))
+ }
+
+ tp := &treeParser{
+ lineOffset: parts[0].lineCount + parts[1].lineCount + 2,
+ }
+ tree, err := tp.parseTree(bytes.NewReader(parts[2].buf))
+ if err != nil {
+ return nil, err
+ }
+
+ return &TestCase{
+ Description: string(parts[0].buf),
+ Source: parts[1].buf,
+ Output: tree,
+ }, nil
+}
+
+type testCasePart struct {
+ buf []byte
+ lineCount int
+}
+
+func splitIntoParts(r io.Reader) ([]*testCasePart, error) {
+ var bufs []*testCasePart
+ s := bufio.NewScanner(r)
+ for {
+ buf, lineCount, err := readPart(s)
+ if err != nil {
+ return nil, err
+ }
+ if buf == nil {
+ break
+ }
+ bufs = append(bufs, &testCasePart{
+ buf: buf,
+ lineCount: lineCount,
+ })
+ }
+ if err := s.Err(); err != nil {
+ return nil, err
+ }
+ return bufs, nil
+}
+
+var reDelim = regexp.MustCompile(`^\s*---+\s*$`)
+
+func readPart(s *bufio.Scanner) ([]byte, int, error) {
+ if !s.Scan() {
+ return nil, 0, s.Err()
+ }
+ buf := &bytes.Buffer{}
+ line := s.Bytes()
+ if reDelim.Match(line) {
+ // Return an empty slice because (*bytes.Buffer).Bytes() returns nil if we have never written data.
+ return []byte{}, 0, nil
+ }
+ _, err := buf.Write(line)
+ if err != nil {
+ return nil, 0, err
+ }
+ lineCount := 1
+ for s.Scan() {
+ line := s.Bytes()
+ if reDelim.Match(line) {
+ return buf.Bytes(), lineCount, nil
+ }
+ _, err := buf.Write([]byte("\n"))
+ if err != nil {
+ return nil, 0, err
+ }
+ _, err = buf.Write(line)
+ if err != nil {
+ return nil, 0, err
+ }
+ lineCount++
+ }
+ if err := s.Err(); err != nil {
+ return nil, 0, err
+ }
+ return buf.Bytes(), lineCount, nil
+}
+
+type treeParser struct {
+ lineOffset int
+}
+
+func (tp *treeParser) parseTree(src io.Reader) (*Tree, error) {
+ toks, err := NewTokenStream(src)
+ if err != nil {
+ return nil, err
+ }
+ gram := NewGrammar()
+ tb := NewDefaultSyntaxTreeBuilder()
+ p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb)))
+ if err != nil {
+ return nil, err
+ }
+ err = p.Parse()
+ if err != nil {
+ return nil, err
+ }
+ synErrs := p.SyntaxErrors()
+ if len(synErrs) > 0 {
+ var b strings.Builder
+ b.Write(formatSyntaxError(synErrs[0], gram, tp.lineOffset))
+ for _, synErr := range synErrs[1:] {
+ b.WriteRune('\n')
+ b.Write(formatSyntaxError(synErr, gram, tp.lineOffset))
+ }
+ return nil, errors.New(b.String())
+ }
+ t, err := tp.genTree(tb.Tree())
+ if err != nil {
+ return nil, err
+ }
+ return t.Fill(), nil
+}
+
+func formatSyntaxError(synErr *SyntaxError, gram Grammar, lineOffset int) []byte {
+ var b bytes.Buffer
+
+ b.WriteString(fmt.Sprintf("%v:%v: %v: ", lineOffset+synErr.Row+1, synErr.Col+1, synErr.Message))
+
+ tok := synErr.Token
+ switch {
+ case tok.EOF():
+ b.WriteString("<eof>")
+ case tok.Invalid():
+ b.WriteString(fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme())))
+ default:
+ if term := gram.Terminal(tok.TerminalID()); term != "" {
+ b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), term))
+ } else {
+ b.WriteString(fmt.Sprintf("'%v'", string(tok.Lexeme())))
+ }
+ }
+ b.WriteString(fmt.Sprintf(": expected: %v", synErr.ExpectedTerminals[0]))
+ for _, t := range synErr.ExpectedTerminals[1:] {
+ b.WriteString(fmt.Sprintf(", %v", t))
+ }
+
+ return b.Bytes()
+}
+
+func (tp *treeParser) genTree(node *Node) (*Tree, error) {
+ // A node labeled 'error' cannot have children. It always must be (error).
+ if sym := node.Children[0]; sym.Text == "error" {
+ if len(node.Children) > 1 {
+ return nil, fmt.Errorf("%v:%v: error node cannot take children", tp.lineOffset+sym.Row+1, sym.Col+1)
+ }
+ return NewTerminalNode(sym.Text, ""), nil
+ }
+
+ if len(node.Children) == 2 && node.Children[1].KindName == "string" {
+ var text string
+ str := node.Children[1].Children[0]
+ switch str.KindName {
+ case "raw_string":
+ text = str.Children[0].Text
+ case "interpreted_string":
+ var b strings.Builder
+ for _, c := range str.Children {
+ switch c.KindName {
+ case "escaped_seq":
+ b.WriteString(strings.TrimPrefix(`\`, c.Text))
+ case "escape_char":
+ return nil, fmt.Errorf("%v:%v: incomplete escape sequence", tp.lineOffset+c.Row+1, c.Col+1)
+ case "codepoint_expr":
+ cp := c.Children[0]
+ n, err := strconv.ParseInt(cp.Text, 16, 64)
+ if err != nil {
+ return nil, fmt.Errorf("%v:%v: %v", tp.lineOffset+cp.Row+1, cp.Col+1, err)
+ }
+ if !utf8.ValidRune(rune(n)) {
+ return nil, fmt.Errorf("%v:%v: invalid code point: %v", tp.lineOffset+cp.Row+1, cp.Col+1, cp.Text)
+ }
+ b.WriteRune(rune(n))
+ default:
+ b.WriteString(c.Text)
+ }
+ }
+ text = b.String()
+ }
+ return NewTerminalNode(node.Children[0].Text, text), nil
+ }
+
+ var children []*Tree
+ if len(node.Children) > 1 {
+ children = make([]*Tree, len(node.Children)-1)
+ for i, c := range node.Children[1:] {
+ var err error
+ children[i], err = tp.genTree(c)
+ if err != nil {
+ return nil, err
+ }
+ }
+ }
+ return NewNonTerminalTree(node.Children[0].Text, children...), nil
+}
+// Code generated by vartan-go. DO NOT EDIT.
+
type ModeID int
func (id ModeID) Int() int {
@@ -1022,3 +1353,1072 @@ func (s *lexSpec) KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, strin
id := s.kindIDs[mode][modeKind]
return id, s.kindNames[id]
}
+// Code generated by vartan-go. DO NOT EDIT.
+
+type Grammar interface {
+ // InitialState returns the initial state of a parser.
+ InitialState() int
+
+ // StartProduction returns the start production of grammar.
+ StartProduction() int
+
+ // Action returns an ACTION entry corresponding to a (state, terminal symbol) pair.
+ Action(state int, terminal int) int
+
+ // GoTo returns a GOTO entry corresponding to a (state, non-terminal symbol) pair.
+ GoTo(state int, lhs int) int
+
+ // ErrorTrapperState returns true when a state can shift the error symbol.
+ ErrorTrapperState(state int) bool
+
+ // LHS returns a LHS symbol of a production.
+ LHS(prod int) int
+
+ // AlternativeSymbolCount returns a symbol count of p production.
+ AlternativeSymbolCount(prod int) int
+
+ // RecoverProduction returns true when a production has the recover directive.
+ RecoverProduction(prod int) bool
+
+ // NonTerminal retuns a string representaion of a non-terminal symbol.
+ NonTerminal(nonTerminal int) string
+
+ // TerminalCount returns a terminal symbol count of grammar.
+ TerminalCount() int
+
+ // SkipTerminal returns true when a terminal symbol must be skipped on syntax analysis.
+ SkipTerminal(terminal int) bool
+
+ // EOF returns the EOF symbol.
+ EOF() int
+
+ // Error returns the error symbol.
+ Error() int
+
+ // Terminal retuns a string representaion of a terminal symbol.
+ Terminal(terminal int) string
+
+ // ASTAction returns an AST action entries.
+ ASTAction(prod int) []int
+}
+
+type VToken interface {
+ // TerminalID returns a terminal ID.
+ TerminalID() int
+
+ // Lexeme returns a lexeme.
+ Lexeme() []byte
+
+ // EOF returns true when a token represents EOF.
+ EOF() bool
+
+ // Invalid returns true when a token is invalid.
+ Invalid() bool
+
+ // Position returns (row, column) pair.
+ Position() (int, int)
+}
+
+type TokenStream interface {
+ Next() (VToken, error)
+}
+
+type SyntaxError struct {
+ Row int
+ Col int
+ Message string
+ Token VToken
+ ExpectedTerminals []string
+}
+
+type ParserOption func(p *Parser) error
+
+// DisableLAC disables LAC (lookahead correction). LAC is enabled by default.
+func DisableLAC() ParserOption {
+ return func(p *Parser) error {
+ p.disableLAC = true
+ return nil
+ }
+}
+
+func SemanticAction(semAct SemanticActionSet) ParserOption {
+ return func(p *Parser) error {
+ p.semAct = semAct
+ return nil
+ }
+}
+
+type Parser struct {
+ toks TokenStream
+ gram Grammar
+ stateStack *stateStack
+ semAct SemanticActionSet
+ disableLAC bool
+ onError bool
+ shiftCount int
+ synErrs []*SyntaxError
+}
+
+func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) {
+ p := &Parser{
+ toks: toks,
+ gram: gram,
+ stateStack: &stateStack{},
+ }
+
+ for _, opt := range opts {
+ err := opt(p)
+ if err != nil {
+ return nil, err
+ }
+ }
+
+ return p, nil
+}
+
+func (p *Parser) Parse() error {
+ p.stateStack.push(p.gram.InitialState())
+ tok, err := p.nextToken()
+ if err != nil {
+ return err
+ }
+
+ACTION_LOOP:
+ for {
+ act := p.lookupAction(tok)
+
+ switch {
+ case act < 0: // Shift
+ nextState := act * -1
+
+ recovered := false
+ if p.onError {
+ p.shiftCount++
+
+ // When the parser performs shift three times, the parser recovers from the error state.
+ if p.shiftCount >= 3 {
+ p.onError = false
+ p.shiftCount = 0
+ recovered = true
+ }
+ }
+
+ p.shift(nextState)
+
+ if p.semAct != nil {
+ p.semAct.Shift(tok, recovered)
+ }
+
+ tok, err = p.nextToken()
+ if err != nil {
+ return err
+ }
+ case act > 0: // Reduce
+ prodNum := act
+
+ recovered := false
+ if p.onError && p.gram.RecoverProduction(prodNum) {
+ p.onError = false
+ p.shiftCount = 0
+ recovered = true
+ }
+
+ accepted := p.reduce(prodNum)
+ if accepted {
+ if p.semAct != nil {
+ p.semAct.Accept()
+ }
+
+ return nil
+ }
+
+ if p.semAct != nil {
+ p.semAct.Reduce(prodNum, recovered)
+ }
+ default: // Error
+ if p.onError {
+ tok, err = p.nextToken()
+ if err != nil {
+ return err
+ }
+ if tok.EOF() {
+ if p.semAct != nil {
+ p.semAct.MissError(tok)
+ }
+
+ return nil
+ }
+
+ continue ACTION_LOOP
+ }
+
+ row, col := tok.Position()
+ p.synErrs = append(p.synErrs, &SyntaxError{
+ Row: row,
+ Col: col,
+ Message: "unexpected token",
+ Token: tok,
+ ExpectedTerminals: p.searchLookahead(p.stateStack.top()),
+ })
+
+ count, ok := p.trapError()
+ if !ok {
+ if p.semAct != nil {
+ p.semAct.MissError(tok)
+ }
+
+ return nil
+ }
+
+ p.onError = true
+ p.shiftCount = 0
+
+ act, err := p.lookupActionOnError()
+ if err != nil {
+ return err
+ }
+
+ p.shift(act * -1)
+
+ if p.semAct != nil {
+ p.semAct.TrapAndShiftError(tok, count)
+ }
+ }
+ }
+}
+
+// validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid,
+// this method returns `true`.
+func (p *Parser) validateLookahead(term int) bool {
+ p.stateStack.enableExploratoryMode()
+ defer p.stateStack.disableExploratoryMode()
+
+ for {
+ act := p.gram.Action(p.stateStack.topExploratorily(), term)
+
+ switch {
+ case act < 0: // Shift
+ return true
+ case act > 0: // Reduce
+ prodNum := act
+
+ lhs := p.gram.LHS(prodNum)
+ if lhs == p.gram.LHS(p.gram.StartProduction()) {
+ return true
+ }
+ n := p.gram.AlternativeSymbolCount(prodNum)
+ p.stateStack.popExploratorily(n)
+ state := p.gram.GoTo(p.stateStack.topExploratorily(), lhs)
+ p.stateStack.pushExploratorily(state)
+ default: // Error
+ return false
+ }
+ }
+}
+
+func (p *Parser) nextToken() (VToken, error) {
+ for {
+ // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0,
+ // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect
+ // a syntax error because the parser cannot find an entry corresponding to the invalid token.
+ tok, err := p.toks.Next()
+ if err != nil {
+ return nil, err
+ }
+
+ if p.gram.SkipTerminal(tok.TerminalID()) {
+ continue
+ }
+
+ return tok, nil
+ }
+}
+
+func (p *Parser) tokenToTerminal(tok VToken) int {
+ if tok.EOF() {
+ return p.gram.EOF()
+ }
+
+ return tok.TerminalID()
+}
+
+func (p *Parser) lookupAction(tok VToken) int {
+ if !p.disableLAC {
+ term := p.tokenToTerminal(tok)
+ if !p.validateLookahead(term) {
+ return 0
+ }
+ }
+
+ return p.gram.Action(p.stateStack.top(), p.tokenToTerminal(tok))
+}
+
+func (p *Parser) lookupActionOnError() (int, error) {
+ act := p.gram.Action(p.stateStack.top(), p.gram.Error())
+ if act >= 0 {
+ return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.Terminal(p.gram.Error()))
+ }
+
+ return act, nil
+}
+
+func (p *Parser) shift(nextState int) {
+ p.stateStack.push(nextState)
+}
+
+func (p *Parser) reduce(prodNum int) bool {
+ lhs := p.gram.LHS(prodNum)
+ if lhs == p.gram.LHS(p.gram.StartProduction()) {
+ return true
+ }
+ n := p.gram.AlternativeSymbolCount(prodNum)
+ p.stateStack.pop(n)
+ nextState := p.gram.GoTo(p.stateStack.top(), lhs)
+ p.stateStack.push(nextState)
+ return false
+}
+
+func (p *Parser) trapError() (int, bool) {
+ count := 0
+ for {
+ if p.gram.ErrorTrapperState(p.stateStack.top()) {
+ return count, true
+ }
+
+ if p.stateStack.top() != p.gram.InitialState() {
+ p.stateStack.pop(1)
+ count++
+ } else {
+ return 0, false
+ }
+ }
+}
+
+func (p *Parser) SyntaxErrors() []*SyntaxError {
+ return p.synErrs
+}
+
+func (p *Parser) searchLookahead(state int) []string {
+ kinds := []string{}
+ termCount := p.gram.TerminalCount()
+ for term := 0; term < termCount; term++ {
+ if p.disableLAC {
+ if p.gram.Action(p.stateStack.top(), term) == 0 {
+ continue
+ }
+ } else {
+ if !p.validateLookahead(term) {
+ continue
+ }
+ }
+
+ // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol
+ // intentionally.
+ if term == p.gram.Error() {
+ continue
+ }
+
+ kinds = append(kinds, p.gram.Terminal(term))
+ }
+
+ return kinds
+}
+
+type stateStack struct {
+ items []int
+ itemsExp []int
+}
+
+func (s *stateStack) enableExploratoryMode() {
+ s.itemsExp = make([]int, len(s.items))
+ copy(s.itemsExp, s.items)
+}
+
+func (s *stateStack) disableExploratoryMode() {
+ s.itemsExp = nil
+}
+
+func (s *stateStack) top() int {
+ return s.items[len(s.items)-1]
+}
+
+func (s *stateStack) topExploratorily() int {
+ return s.itemsExp[len(s.itemsExp)-1]
+}
+
+func (s *stateStack) push(state int) {
+ s.items = append(s.items, state)
+}
+
+func (s *stateStack) pushExploratorily(state int) {
+ s.itemsExp = append(s.itemsExp, state)
+}
+
+func (s *stateStack) pop(n int) {
+ s.items = s.items[:len(s.items)-n]
+}
+
+func (s *stateStack) popExploratorily(n int) {
+ s.itemsExp = s.itemsExp[:len(s.itemsExp)-n]
+}
+
+type grammarImpl struct {
+ recoverProductions []int
+ action []int
+ goTo []int
+ alternativeSymbolCounts []int
+ errorTrapperStates []int
+ nonTerminals []string
+ lhsSymbols []int
+ terminals []string
+ terminalSkip []int
+ astActions [][]int
+}
+
+func NewGrammar() *grammarImpl {
+ return &grammarImpl{
+ recoverProductions: []int{
+ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ },
+ action: []int{
+ 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -3, 0, 0, 0, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, -5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
+ -2, 7, 0, -11, 0, 0, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 4,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -14, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 12, 12, 0, 0, -17, 12, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -22, 0, 16, 16, 0, 0, 0, 0, 0, -23,
+ -24, -25, -26, -27, -28, -29, 16, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -30, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ -31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -23, -24, -25, -26, -27, -28, -29, 15,
+ 0, 18, 0, 0, 18, 18, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 18, 18, 18, 0,
+ 25, 0, 0, 25, 25, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -33, 0, 19, 0,
+ 0, 19, 19, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, -34, 0, 0, 0, 0, 0, 0, 20, 0, 0, 20,
+ 20, 0, 0, 0, 0, 0, 20, 20, 20, 20, 20, 20, 20, 20, 0, 21, 0, 0, 21, 21,
+ 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 0, 22, 0, 0, 22, 22, 0,
+ 0, 0, 0, 0, 22, 22, 22, 22, 22, 22, 22, 22, 0, 23, 0, 0, 23, 23, 0, 0,
+ 0, 0, 0, 23, 23, 23, 23, 23, 23, 23, 23, 0, 24, 0, 0, 24, 24, 0, 0, 0,
+ 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 0, 10, 0, 0, 10, 10, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 13, 13, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 17, 17, 0, 0, 0, 0, 0, 17,
+ 17, 17, 17, 17, 17, 17, 17, 0, 14, 0, 0, 14, 14, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, -35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -36,
+ 0, 0, 0, 0, 0, 26, 0, 0, 26, 26, 0, 0, 0, 0, 0, 26, 26, 26, 26, 26,
+ 26, 26, 26,
+ },
+ goTo: []int{
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 0, 10, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 19, 20, 21, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 21,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ },
+ alternativeSymbolCounts: []int{
+ 0, 1, 4, 4, 3, 2, 1, 0, 1, 1, 3, 1, 0, 3, 3, 1, 0, 2, 1, 1,
+ 1, 1, 1, 1, 1, 1, 4,
+ },
+ errorTrapperStates: []int{
+ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ },
+ nonTerminals: []string{
+ "",
+ "tree'",
+ "tree",
+ "tree_list",
+ "string",
+ "raw_string",
+ "opt_raw_string_body",
+ "interpreted_string",
+ "opt_interpreted_string_body",
+ "interpreted_string_body",
+ "interpreted_string_elem",
+ "codepoint_expr",
+ },
+ lhsSymbols: []int{
+ 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
+ 10, 10, 10, 10, 10, 10, 11,
+ },
+ terminals: []string{
+ "",
+ "<eof>",
+ "error",
+ "ws",
+ "l_paren",
+ "r_paren",
+ "identifier",
+ "raw_string_open",
+ "raw_string_body",
+ "raw_string_close",
+ "interpreted_string_open",
+ "interpreted_seq",
+ "codepoint_prefix",
+ "l_brace",
+ "r_brace",
+ "hex_digits",
+ "escaped_seq",
+ "escape_char",
+ "interpreted_string_close",
+ },
+ terminalSkip: []int{
+ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ },
+ astActions: [][]int{
+ nil,
+ nil,
+ {
+ 2, -3,
+ },
+ {
+ 2, 3,
+ },
+ {
+ 2,
+ },
+ {
+ -1, 2,
+ },
+ nil,
+ nil,
+ nil,
+ nil,
+ {
+ -2,
+ },
+ nil,
+ nil,
+ {
+ -2,
+ },
+ {
+ 2,
+ },
+ {
+ -1,
+ },
+ nil,
+ {
+ -1, -2,
+ },
+ {
+ -1,
+ },
+ nil,
+ nil,
+ nil,
+ nil,
+ nil,
+ nil,
+ nil,
+ {
+ 3,
+ },
+ },
+ }
+}
+
+func (g *grammarImpl) InitialState() int {
+ return 0
+}
+
+func (g *grammarImpl) StartProduction() int {
+ return 1
+}
+
+func (g *grammarImpl) RecoverProduction(prod int) bool {
+ return g.recoverProductions[prod] != 0
+}
+
+func (g *grammarImpl) Action(state int, terminal int) int {
+ return g.action[state*19+terminal]
+}
+
+func (g *grammarImpl) GoTo(state int, lhs int) int {
+ return g.goTo[state*12+lhs]
+}
+
+func (g *grammarImpl) AlternativeSymbolCount(prod int) int {
+ return g.alternativeSymbolCounts[prod]
+}
+
+func (g *grammarImpl) TerminalCount() int {
+ return 19
+}
+
+func (g *grammarImpl) SkipTerminal(terminal int) bool {
+ return g.terminalSkip[terminal] == 1
+}
+
+func (g *grammarImpl) ErrorTrapperState(state int) bool {
+ return g.errorTrapperStates[state] != 0
+}
+
+func (g *grammarImpl) NonTerminal(nonTerminal int) string {
+ return g.nonTerminals[nonTerminal]
+}
+
+func (g *grammarImpl) LHS(prod int) int {
+ return g.lhsSymbols[prod]
+}
+
+func (g *grammarImpl) EOF() int {
+ return 1
+}
+
+func (g *grammarImpl) Error() int {
+ return 2
+}
+
+func (g *grammarImpl) Terminal(terminal int) string {
+ return g.terminals[terminal]
+}
+
+func (g *grammarImpl) ASTAction(prod int) []int {
+ return g.astActions[prod]
+}
+
+type vToken struct {
+ terminalID int
+ tok *Token
+}
+
+func (t *vToken) TerminalID() int {
+ return t.terminalID
+}
+
+func (t *vToken) Lexeme() []byte {
+ return t.tok.Lexeme
+}
+
+func (t *vToken) EOF() bool {
+ return t.tok.EOF
+}
+
+func (t *vToken) Invalid() bool {
+ return t.tok.Invalid
+}
+
+func (t *vToken) Position() (int, int) {
+ return t.tok.Row, t.tok.Col
+}
+
+var kindToTerminal = []int{
+ 0, 3, 4, 5, 6, 7, 10, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18,
+}
+
+type tokenStream struct {
+ lex *Lexer
+ kindToTerminal []int
+}
+
+func NewTokenStream(src io.Reader) (*tokenStream, error) {
+ lex, err := NewLexer(NewLexSpec(), src)
+ if err != nil {
+ return nil, err
+ }
+
+ return &tokenStream{
+ lex: lex,
+ }, nil
+}
+
+func (t *tokenStream) Next() (VToken, error) {
+ tok, err := t.lex.Next()
+ if err != nil {
+ return nil, err
+ }
+ return &vToken{
+ terminalID: kindToTerminal[tok.KindID],
+ tok: tok,
+ }, nil
+}
+// Code generated by vartan-go. DO NOT EDIT.
+
+// SemanticActionSet is a set of semantic actions a parser calls.
+type SemanticActionSet interface {
+ // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol.
+ // When the parser recovered from an error state by shifting the token, `recovered` is true.
+ Shift(tok VToken, recovered bool)
+
+ // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production.
+ // When the parser recovered from an error state by reducing the production, `recovered` is true.
+ Reduce(prodNum int, recovered bool)
+
+ // Accept runs when the parser accepts an input.
+ Accept()
+
+ // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack.
+ // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards
+ // from the state stack.
+ // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token
+ // corresponding to the error symbol doesn't exist.
+ TrapAndShiftError(cause VToken, popped int)
+
+ // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error.
+ MissError(cause VToken)
+}
+
+var _ SemanticActionSet = &SyntaxTreeActionSet{}
+
+// SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface.
+type SyntaxTreeNode interface {
+ // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast`
+ // directive with `...` operator.
+ ChildCount() int
+
+ // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast`
+ // directive with `...` operator.
+ ExpandChildren() []SyntaxTreeNode
+}
+
+var _ SyntaxTreeNode = &Node{}
+
+// SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types.
+// The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface.
+type SyntaxTreeBuilder interface {
+ Shift(kindName string, text string, row, col int) SyntaxTreeNode
+ ShiftError(kindName string) SyntaxTreeNode
+ Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode
+ Accept(f SyntaxTreeNode)
+}
+
+var _ SyntaxTreeBuilder = &DefaulSyntaxTreeBuilder{}
+
+// DefaulSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder.
+type DefaulSyntaxTreeBuilder struct {
+ tree *Node
+}
+
+// NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder.
+func NewDefaultSyntaxTreeBuilder() *DefaulSyntaxTreeBuilder {
+ return &DefaulSyntaxTreeBuilder{}
+}
+
+// Shift is a implementation of SyntaxTreeBuilder.Shift.
+func (b *DefaulSyntaxTreeBuilder) Shift(kindName string, text string, row, col int) SyntaxTreeNode {
+ return &Node{
+ Type: NodeTypeTerminal,
+ KindName: kindName,
+ Text: text,
+ Row: row,
+ Col: col,
+ }
+}
+
+// ShiftError is a implementation of SyntaxTreeBuilder.ShiftError.
+func (b *DefaulSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode {
+ return &Node{
+ Type: NodeTypeError,
+ KindName: kindName,
+ }
+}
+
+// Reduce is a implementation of SyntaxTreeBuilder.Reduce.
+func (b *DefaulSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode {
+ cNodes := make([]*Node, len(children))
+ for i, c := range children {
+ cNodes[i] = c.(*Node)
+ }
+ return &Node{
+ Type: NodeTypeNonTerminal,
+ KindName: kindName,
+ Children: cNodes,
+ }
+}
+
+// Accept is a implementation of SyntaxTreeBuilder.Accept.
+func (b *DefaulSyntaxTreeBuilder) Accept(f SyntaxTreeNode) {
+ b.tree = f.(*Node)
+}
+
+// Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil.
+func (b *DefaulSyntaxTreeBuilder) Tree() *Node {
+ return b.tree
+}
+
+// SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree.
+type SyntaxTreeActionSet struct {
+ gram Grammar
+ builder SyntaxTreeBuilder
+ semStack *semanticStack
+ disableASTAction bool
+}
+
+// NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree).
+// When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them.
+func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet {
+ return &SyntaxTreeActionSet{
+ gram: gram,
+ builder: builder,
+ semStack: newSemanticStack(),
+ }
+}
+
+// NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree).
+// Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them.
+func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet {
+ return &SyntaxTreeActionSet{
+ gram: gram,
+ builder: builder,
+ semStack: newSemanticStack(),
+ disableASTAction: true,
+ }
+}
+
+// Shift is a implementation of SemanticActionSet.Shift method.
+func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) {
+ term := a.tokenToTerminal(tok)
+ row, col := tok.Position()
+ a.semStack.push(a.builder.Shift(a.gram.Terminal(term), string(tok.Lexeme()), row, col))
+}
+
+// Reduce is a implementation of SemanticActionSet.Reduce method.
+func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) {
+ lhs := a.gram.LHS(prodNum)
+
+ // When an alternative is empty, `n` will be 0, and `handle` will be empty slice.
+ n := a.gram.AlternativeSymbolCount(prodNum)
+ handle := a.semStack.pop(n)
+
+ var astAct []int
+ if !a.disableASTAction {
+ astAct = a.gram.ASTAction(prodNum)
+ }
+ var children []SyntaxTreeNode
+ if astAct != nil {
+ // Count the number of children in advance to avoid frequent growth in a slice for children.
+ {
+ l := 0
+ for _, e := range astAct {
+ if e > 0 {
+ l++
+ } else {
+ offset := e*-1 - 1
+ l += handle[offset].ChildCount()
+ }
+ }
+
+ children = make([]SyntaxTreeNode, l)
+ }
+
+ p := 0
+ for _, e := range astAct {
+ if e > 0 {
+ offset := e - 1
+ children[p] = handle[offset]
+ p++
+ } else {
+ offset := e*-1 - 1
+ for _, c := range handle[offset].ExpandChildren() {
+ children[p] = c
+ p++
+ }
+ }
+ }
+ } else {
+ // If an alternative has no AST action, a driver generates
+ // a node with the same structure as a CST.
+ children = handle
+ }
+
+ a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children))
+}
+
+// Accept is a implementation of SemanticActionSet.Accept method.
+func (a *SyntaxTreeActionSet) Accept() {
+ top := a.semStack.pop(1)
+ a.builder.Accept(top[0])
+}
+
+// TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method.
+func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) {
+ a.semStack.pop(popped)
+ a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error())))
+}
+
+// MissError is a implementation of SemanticActionSet.MissError method.
+func (a *SyntaxTreeActionSet) MissError(cause VToken) {
+}
+
+func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int {
+ if tok.EOF() {
+ return a.gram.EOF()
+ }
+
+ return tok.TerminalID()
+}
+
+type semanticStack struct {
+ frames []SyntaxTreeNode
+}
+
+func newSemanticStack() *semanticStack {
+ return &semanticStack{
+ frames: make([]SyntaxTreeNode, 0, 100),
+ }
+}
+
+func (s *semanticStack) push(f SyntaxTreeNode) {
+ s.frames = append(s.frames, f)
+}
+
+func (s *semanticStack) pop(n int) []SyntaxTreeNode {
+ fs := s.frames[len(s.frames)-n:]
+ s.frames = s.frames[:len(s.frames)-n]
+
+ return fs
+}
+
+type NodeType int
+
+const (
+ NodeTypeError = 0
+ NodeTypeTerminal = 1
+ NodeTypeNonTerminal = 2
+)
+
+// Node is a implementation of SyntaxTreeNode interface.
+type Node struct {
+ Type NodeType
+ KindName string
+ Text string
+ Row int
+ Col int
+ Children []*Node
+}
+
+func (n *Node) MarshalJSON() ([]byte, error) {
+ switch n.Type {
+ case NodeTypeError:
+ return json.Marshal(struct {
+ Type NodeType `json:"type"`
+ KindName string `json:"kind_name"`
+ }{
+ Type: n.Type,
+ KindName: n.KindName,
+ })
+ case NodeTypeTerminal:
+ if n.KindName == "" {
+ return json.Marshal(struct {
+ Type NodeType `json:"type"`
+ Text string `json:"text"`
+ Row int `json:"row"`
+ Col int `json:"col"`
+ }{
+ Type: n.Type,
+ Text: n.Text,
+ Row: n.Row,
+ Col: n.Col,
+ })
+ }
+ return json.Marshal(struct {
+ Type NodeType `json:"type"`
+ KindName string `json:"kind_name"`
+ Text string `json:"text"`
+ Row int `json:"row"`
+ Col int `json:"col"`
+ }{
+ Type: n.Type,
+ KindName: n.KindName,
+ Text: n.Text,
+ Row: n.Row,
+ Col: n.Col,
+ })
+ case NodeTypeNonTerminal:
+ return json.Marshal(struct {
+ Type NodeType `json:"type"`
+ KindName string `json:"kind_name"`
+ Children []*Node `json:"children"`
+ }{
+ Type: n.Type,
+ KindName: n.KindName,
+ Children: n.Children,
+ })
+ default:
+ return nil, fmt.Errorf("invalid node type: %v", n.Type)
+ }
+}
+
+// ChildCount is a implementation of SyntaxTreeNode.ChildCount.
+func (n *Node) ChildCount() int {
+ return len(n.Children)
+}
+
+// ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren.
+func (n *Node) ExpandChildren() []SyntaxTreeNode {
+ fs := make([]SyntaxTreeNode, len(n.Children))
+ for i, n := range n.Children {
+ fs[i] = n
+ }
+ return fs
+}
+
+// PrintTree prints a syntax tree whose root is `node`.
+func PrintTree(w io.Writer, node *Node) {
+ printTree(w, node, "", "")
+}
+
+func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) {
+ if node == nil {
+ return
+ }
+
+ switch node.Type {
+ case NodeTypeError:
+ fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
+ case NodeTypeTerminal:
+ fmt.Fprintf(w, "%v%v %v\n", ruledLine, node.KindName, strconv.Quote(node.Text))
+ case NodeTypeNonTerminal:
+ fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
+
+ num := len(node.Children)
+ for i, child := range node.Children {
+ var line string
+ if num > 1 && i < num-1 {
+ line = "├─ "
+ } else {
+ line = "└─ "
+ }
+
+ var prefix string
+ if i >= num-1 {
+ prefix = " "
+ } else {
+ prefix = "│ "
+ }
+
+ printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix)
+ }
+ }
+}
diff --git a/src/urubu/spec/test/parser.go b/src/urubu/spec/test/parser.go
deleted file mode 100644
index b7265d7..0000000
--- a/src/urubu/spec/test/parser.go
+++ /dev/null
@@ -1,336 +0,0 @@
-//go:generate vartan compile tree.vartan -o tree.json
-//go:generate vartan-go tree.json --package test
-
-package test
-
-import (
- "bufio"
- "bytes"
- "errors"
- "fmt"
- "io"
- "regexp"
- "strconv"
- "strings"
- "unicode/utf8"
-)
-
-type TreeDiff struct {
- ExpectedPath string
- ActualPath string
- Message string
-}
-
-func newTreeDiff(expected, actual *Tree, message string) *TreeDiff {
- return &TreeDiff{
- ExpectedPath: expected.path(),
- ActualPath: actual.path(),
- Message: message,
- }
-}
-
-type Tree struct {
- Parent *Tree
- Offset int
- Kind string
- Children []*Tree
- Lexeme string
-}
-
-func NewNonTerminalTree(kind string, children ...*Tree) *Tree {
- return &Tree{
- Kind: kind,
- Children: children,
- }
-}
-
-func NewTerminalNode(kind string, lexeme string) *Tree {
- return &Tree{
- Kind: kind,
- Lexeme: lexeme,
- }
-}
-
-func (t *Tree) Fill() *Tree {
- for i, c := range t.Children {
- c.Parent = t
- c.Offset = i
- c.Fill()
- }
- return t
-}
-
-func (t *Tree) path() string {
- if t.Parent == nil {
- return t.Kind
- }
- return fmt.Sprintf("%v.[%v]%v", t.Parent.path(), t.Offset, t.Kind)
-}
-
-func (t *Tree) Format() []byte {
- var b bytes.Buffer
- t.format(&b, 0)
- return b.Bytes()
-}
-
-func (t *Tree) format(buf *bytes.Buffer, depth int) {
- for i := 0; i < depth; i++ {
- buf.WriteString(" ")
- }
- buf.WriteString("(")
- buf.WriteString(t.Kind)
- if len(t.Children) > 0 {
- buf.WriteString("\n")
- for i, c := range t.Children {
- c.format(buf, depth+1)
- if i < len(t.Children)-1 {
- buf.WriteString("\n")
- }
- }
- }
- buf.WriteString(")")
-}
-
-func DiffTree(expected, actual *Tree) []*TreeDiff {
- if expected == nil && actual == nil {
- return nil
- }
- if actual.Kind != expected.Kind {
- msg := fmt.Sprintf("unexpected kind: expected '%v' but got '%v'", expected.Kind, actual.Kind)
- return []*TreeDiff{
- newTreeDiff(expected, actual, msg),
- }
- }
- if expected.Lexeme != actual.Lexeme {
- msg := fmt.Sprintf("unexpected lexeme: expected '%v' but got '%v'", expected.Lexeme, actual.Lexeme)
- return []*TreeDiff{
- newTreeDiff(expected, actual, msg),
- }
- }
- if len(actual.Children) != len(expected.Children) {
- msg := fmt.Sprintf("unexpected node count: expected %v but got %v", len(expected.Children), len(actual.Children))
- return []*TreeDiff{
- newTreeDiff(expected, actual, msg),
- }
- }
- var diffs []*TreeDiff
- for i, exp := range expected.Children {
- if ds := DiffTree(exp, actual.Children[i]); len(ds) > 0 {
- diffs = append(diffs, ds...)
- }
- }
- return diffs
-}
-
-type TestCase struct {
- Description string
- Source []byte
- Output *Tree
-}
-
-func ParseTestCase(r io.Reader) (*TestCase, error) {
- parts, err := splitIntoParts(r)
- if err != nil {
- return nil, err
- }
- if len(parts) != 3 {
- return nil, fmt.Errorf("too many or too few part delimiters: a test case consists of just tree parts: %v parts found", len(parts))
- }
-
- tp := &treeParser{
- lineOffset: parts[0].lineCount + parts[1].lineCount + 2,
- }
- tree, err := tp.parseTree(bytes.NewReader(parts[2].buf))
- if err != nil {
- return nil, err
- }
-
- return &TestCase{
- Description: string(parts[0].buf),
- Source: parts[1].buf,
- Output: tree,
- }, nil
-}
-
-type testCasePart struct {
- buf []byte
- lineCount int
-}
-
-func splitIntoParts(r io.Reader) ([]*testCasePart, error) {
- var bufs []*testCasePart
- s := bufio.NewScanner(r)
- for {
- buf, lineCount, err := readPart(s)
- if err != nil {
- return nil, err
- }
- if buf == nil {
- break
- }
- bufs = append(bufs, &testCasePart{
- buf: buf,
- lineCount: lineCount,
- })
- }
- if err := s.Err(); err != nil {
- return nil, err
- }
- return bufs, nil
-}
-
-var reDelim = regexp.MustCompile(`^\s*---+\s*$`)
-
-func readPart(s *bufio.Scanner) ([]byte, int, error) {
- if !s.Scan() {
- return nil, 0, s.Err()
- }
- buf := &bytes.Buffer{}
- line := s.Bytes()
- if reDelim.Match(line) {
- // Return an empty slice because (*bytes.Buffer).Bytes() returns nil if we have never written data.
- return []byte{}, 0, nil
- }
- _, err := buf.Write(line)
- if err != nil {
- return nil, 0, err
- }
- lineCount := 1
- for s.Scan() {
- line := s.Bytes()
- if reDelim.Match(line) {
- return buf.Bytes(), lineCount, nil
- }
- _, err := buf.Write([]byte("\n"))
- if err != nil {
- return nil, 0, err
- }
- _, err = buf.Write(line)
- if err != nil {
- return nil, 0, err
- }
- lineCount++
- }
- if err := s.Err(); err != nil {
- return nil, 0, err
- }
- return buf.Bytes(), lineCount, nil
-}
-
-type treeParser struct {
- lineOffset int
-}
-
-func (tp *treeParser) parseTree(src io.Reader) (*Tree, error) {
- toks, err := NewTokenStream(src)
- if err != nil {
- return nil, err
- }
- gram := NewGrammar()
- tb := NewDefaultSyntaxTreeBuilder()
- p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb)))
- if err != nil {
- return nil, err
- }
- err = p.Parse()
- if err != nil {
- return nil, err
- }
- synErrs := p.SyntaxErrors()
- if len(synErrs) > 0 {
- var b strings.Builder
- b.Write(formatSyntaxError(synErrs[0], gram, tp.lineOffset))
- for _, synErr := range synErrs[1:] {
- b.WriteRune('\n')
- b.Write(formatSyntaxError(synErr, gram, tp.lineOffset))
- }
- return nil, errors.New(b.String())
- }
- t, err := tp.genTree(tb.Tree())
- if err != nil {
- return nil, err
- }
- return t.Fill(), nil
-}
-
-func formatSyntaxError(synErr *SyntaxError, gram Grammar, lineOffset int) []byte {
- var b bytes.Buffer
-
- b.WriteString(fmt.Sprintf("%v:%v: %v: ", lineOffset+synErr.Row+1, synErr.Col+1, synErr.Message))
-
- tok := synErr.Token
- switch {
- case tok.EOF():
- b.WriteString("<eof>")
- case tok.Invalid():
- b.WriteString(fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme())))
- default:
- if term := gram.Terminal(tok.TerminalID()); term != "" {
- b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), term))
- } else {
- b.WriteString(fmt.Sprintf("'%v'", string(tok.Lexeme())))
- }
- }
- b.WriteString(fmt.Sprintf(": expected: %v", synErr.ExpectedTerminals[0]))
- for _, t := range synErr.ExpectedTerminals[1:] {
- b.WriteString(fmt.Sprintf(", %v", t))
- }
-
- return b.Bytes()
-}
-
-func (tp *treeParser) genTree(node *Node) (*Tree, error) {
- // A node labeled 'error' cannot have children. It always must be (error).
- if sym := node.Children[0]; sym.Text == "error" {
- if len(node.Children) > 1 {
- return nil, fmt.Errorf("%v:%v: error node cannot take children", tp.lineOffset+sym.Row+1, sym.Col+1)
- }
- return NewTerminalNode(sym.Text, ""), nil
- }
-
- if len(node.Children) == 2 && node.Children[1].KindName == "string" {
- var text string
- str := node.Children[1].Children[0]
- switch str.KindName {
- case "raw_string":
- text = str.Children[0].Text
- case "interpreted_string":
- var b strings.Builder
- for _, c := range str.Children {
- switch c.KindName {
- case "escaped_seq":
- b.WriteString(strings.TrimPrefix(`\`, c.Text))
- case "escape_char":
- return nil, fmt.Errorf("%v:%v: incomplete escape sequence", tp.lineOffset+c.Row+1, c.Col+1)
- case "codepoint_expr":
- cp := c.Children[0]
- n, err := strconv.ParseInt(cp.Text, 16, 64)
- if err != nil {
- return nil, fmt.Errorf("%v:%v: %v", tp.lineOffset+cp.Row+1, cp.Col+1, err)
- }
- if !utf8.ValidRune(rune(n)) {
- return nil, fmt.Errorf("%v:%v: invalid code point: %v", tp.lineOffset+cp.Row+1, cp.Col+1, cp.Text)
- }
- b.WriteRune(rune(n))
- default:
- b.WriteString(c.Text)
- }
- }
- text = b.String()
- }
- return NewTerminalNode(node.Children[0].Text, text), nil
- }
-
- var children []*Tree
- if len(node.Children) > 1 {
- children = make([]*Tree, len(node.Children)-1)
- for i, c := range node.Children[1:] {
- var err error
- children[i], err = tp.genTree(c)
- if err != nil {
- return nil, err
- }
- }
- }
- return NewNonTerminalTree(node.Children[0].Text, children...), nil
-}
diff --git a/src/urubu/spec/test/tree_parser.go b/src/urubu/spec/test/tree_parser.go
deleted file mode 100644
index 528d259..0000000
--- a/src/urubu/spec/test/tree_parser.go
+++ /dev/null
@@ -1,716 +0,0 @@
-// Code generated by vartan-go. DO NOT EDIT.
-package test
-
-import (
- "fmt"
- "io"
-)
-
-type Grammar interface {
- // InitialState returns the initial state of a parser.
- InitialState() int
-
- // StartProduction returns the start production of grammar.
- StartProduction() int
-
- // Action returns an ACTION entry corresponding to a (state, terminal symbol) pair.
- Action(state int, terminal int) int
-
- // GoTo returns a GOTO entry corresponding to a (state, non-terminal symbol) pair.
- GoTo(state int, lhs int) int
-
- // ErrorTrapperState returns true when a state can shift the error symbol.
- ErrorTrapperState(state int) bool
-
- // LHS returns a LHS symbol of a production.
- LHS(prod int) int
-
- // AlternativeSymbolCount returns a symbol count of p production.
- AlternativeSymbolCount(prod int) int
-
- // RecoverProduction returns true when a production has the recover directive.
- RecoverProduction(prod int) bool
-
- // NonTerminal retuns a string representaion of a non-terminal symbol.
- NonTerminal(nonTerminal int) string
-
- // TerminalCount returns a terminal symbol count of grammar.
- TerminalCount() int
-
- // SkipTerminal returns true when a terminal symbol must be skipped on syntax analysis.
- SkipTerminal(terminal int) bool
-
- // EOF returns the EOF symbol.
- EOF() int
-
- // Error returns the error symbol.
- Error() int
-
- // Terminal retuns a string representaion of a terminal symbol.
- Terminal(terminal int) string
-
- // ASTAction returns an AST action entries.
- ASTAction(prod int) []int
-}
-
-type VToken interface {
- // TerminalID returns a terminal ID.
- TerminalID() int
-
- // Lexeme returns a lexeme.
- Lexeme() []byte
-
- // EOF returns true when a token represents EOF.
- EOF() bool
-
- // Invalid returns true when a token is invalid.
- Invalid() bool
-
- // Position returns (row, column) pair.
- Position() (int, int)
-}
-
-type TokenStream interface {
- Next() (VToken, error)
-}
-
-type SyntaxError struct {
- Row int
- Col int
- Message string
- Token VToken
- ExpectedTerminals []string
-}
-
-type ParserOption func(p *Parser) error
-
-// DisableLAC disables LAC (lookahead correction). LAC is enabled by default.
-func DisableLAC() ParserOption {
- return func(p *Parser) error {
- p.disableLAC = true
- return nil
- }
-}
-
-func SemanticAction(semAct SemanticActionSet) ParserOption {
- return func(p *Parser) error {
- p.semAct = semAct
- return nil
- }
-}
-
-type Parser struct {
- toks TokenStream
- gram Grammar
- stateStack *stateStack
- semAct SemanticActionSet
- disableLAC bool
- onError bool
- shiftCount int
- synErrs []*SyntaxError
-}
-
-func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) {
- p := &Parser{
- toks: toks,
- gram: gram,
- stateStack: &stateStack{},
- }
-
- for _, opt := range opts {
- err := opt(p)
- if err != nil {
- return nil, err
- }
- }
-
- return p, nil
-}
-
-func (p *Parser) Parse() error {
- p.stateStack.push(p.gram.InitialState())
- tok, err := p.nextToken()
- if err != nil {
- return err
- }
-
-ACTION_LOOP:
- for {
- act := p.lookupAction(tok)
-
- switch {
- case act < 0: // Shift
- nextState := act * -1
-
- recovered := false
- if p.onError {
- p.shiftCount++
-
- // When the parser performs shift three times, the parser recovers from the error state.
- if p.shiftCount >= 3 {
- p.onError = false
- p.shiftCount = 0
- recovered = true
- }
- }
-
- p.shift(nextState)
-
- if p.semAct != nil {
- p.semAct.Shift(tok, recovered)
- }
-
- tok, err = p.nextToken()
- if err != nil {
- return err
- }
- case act > 0: // Reduce
- prodNum := act
-
- recovered := false
- if p.onError && p.gram.RecoverProduction(prodNum) {
- p.onError = false
- p.shiftCount = 0
- recovered = true
- }
-
- accepted := p.reduce(prodNum)
- if accepted {
- if p.semAct != nil {
- p.semAct.Accept()
- }
-
- return nil
- }
-
- if p.semAct != nil {
- p.semAct.Reduce(prodNum, recovered)
- }
- default: // Error
- if p.onError {
- tok, err = p.nextToken()
- if err != nil {
- return err
- }
- if tok.EOF() {
- if p.semAct != nil {
- p.semAct.MissError(tok)
- }
-
- return nil
- }
-
- continue ACTION_LOOP
- }
-
- row, col := tok.Position()
- p.synErrs = append(p.synErrs, &SyntaxError{
- Row: row,
- Col: col,
- Message: "unexpected token",
- Token: tok,
- ExpectedTerminals: p.searchLookahead(p.stateStack.top()),
- })
-
- count, ok := p.trapError()
- if !ok {
- if p.semAct != nil {
- p.semAct.MissError(tok)
- }
-
- return nil
- }
-
- p.onError = true
- p.shiftCount = 0
-
- act, err := p.lookupActionOnError()
- if err != nil {
- return err
- }
-
- p.shift(act * -1)
-
- if p.semAct != nil {
- p.semAct.TrapAndShiftError(tok, count)
- }
- }
- }
-}
-
-// validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid,
-// this method returns `true`.
-func (p *Parser) validateLookahead(term int) bool {
- p.stateStack.enableExploratoryMode()
- defer p.stateStack.disableExploratoryMode()
-
- for {
- act := p.gram.Action(p.stateStack.topExploratorily(), term)
-
- switch {
- case act < 0: // Shift
- return true
- case act > 0: // Reduce
- prodNum := act
-
- lhs := p.gram.LHS(prodNum)
- if lhs == p.gram.LHS(p.gram.StartProduction()) {
- return true
- }
- n := p.gram.AlternativeSymbolCount(prodNum)
- p.stateStack.popExploratorily(n)
- state := p.gram.GoTo(p.stateStack.topExploratorily(), lhs)
- p.stateStack.pushExploratorily(state)
- default: // Error
- return false
- }
- }
-}
-
-func (p *Parser) nextToken() (VToken, error) {
- for {
- // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0,
- // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect
- // a syntax error because the parser cannot find an entry corresponding to the invalid token.
- tok, err := p.toks.Next()
- if err != nil {
- return nil, err
- }
-
- if p.gram.SkipTerminal(tok.TerminalID()) {
- continue
- }
-
- return tok, nil
- }
-}
-
-func (p *Parser) tokenToTerminal(tok VToken) int {
- if tok.EOF() {
- return p.gram.EOF()
- }
-
- return tok.TerminalID()
-}
-
-func (p *Parser) lookupAction(tok VToken) int {
- if !p.disableLAC {
- term := p.tokenToTerminal(tok)
- if !p.validateLookahead(term) {
- return 0
- }
- }
-
- return p.gram.Action(p.stateStack.top(), p.tokenToTerminal(tok))
-}
-
-func (p *Parser) lookupActionOnError() (int, error) {
- act := p.gram.Action(p.stateStack.top(), p.gram.Error())
- if act >= 0 {
- return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.Terminal(p.gram.Error()))
- }
-
- return act, nil
-}
-
-func (p *Parser) shift(nextState int) {
- p.stateStack.push(nextState)
-}
-
-func (p *Parser) reduce(prodNum int) bool {
- lhs := p.gram.LHS(prodNum)
- if lhs == p.gram.LHS(p.gram.StartProduction()) {
- return true
- }
- n := p.gram.AlternativeSymbolCount(prodNum)
- p.stateStack.pop(n)
- nextState := p.gram.GoTo(p.stateStack.top(), lhs)
- p.stateStack.push(nextState)
- return false
-}
-
-func (p *Parser) trapError() (int, bool) {
- count := 0
- for {
- if p.gram.ErrorTrapperState(p.stateStack.top()) {
- return count, true
- }
-
- if p.stateStack.top() != p.gram.InitialState() {
- p.stateStack.pop(1)
- count++
- } else {
- return 0, false
- }
- }
-}
-
-func (p *Parser) SyntaxErrors() []*SyntaxError {
- return p.synErrs
-}
-
-func (p *Parser) searchLookahead(state int) []string {
- kinds := []string{}
- termCount := p.gram.TerminalCount()
- for term := 0; term < termCount; term++ {
- if p.disableLAC {
- if p.gram.Action(p.stateStack.top(), term) == 0 {
- continue
- }
- } else {
- if !p.validateLookahead(term) {
- continue
- }
- }
-
- // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol
- // intentionally.
- if term == p.gram.Error() {
- continue
- }
-
- kinds = append(kinds, p.gram.Terminal(term))
- }
-
- return kinds
-}
-
-type stateStack struct {
- items []int
- itemsExp []int
-}
-
-func (s *stateStack) enableExploratoryMode() {
- s.itemsExp = make([]int, len(s.items))
- copy(s.itemsExp, s.items)
-}
-
-func (s *stateStack) disableExploratoryMode() {
- s.itemsExp = nil
-}
-
-func (s *stateStack) top() int {
- return s.items[len(s.items)-1]
-}
-
-func (s *stateStack) topExploratorily() int {
- return s.itemsExp[len(s.itemsExp)-1]
-}
-
-func (s *stateStack) push(state int) {
- s.items = append(s.items, state)
-}
-
-func (s *stateStack) pushExploratorily(state int) {
- s.itemsExp = append(s.itemsExp, state)
-}
-
-func (s *stateStack) pop(n int) {
- s.items = s.items[:len(s.items)-n]
-}
-
-func (s *stateStack) popExploratorily(n int) {
- s.itemsExp = s.itemsExp[:len(s.itemsExp)-n]
-}
-
-type grammarImpl struct {
- recoverProductions []int
- action []int
- goTo []int
- alternativeSymbolCounts []int
- errorTrapperStates []int
- nonTerminals []string
- lhsSymbols []int
- terminals []string
- terminalSkip []int
- astActions [][]int
-}
-
-func NewGrammar() *grammarImpl {
- return &grammarImpl{
- recoverProductions: []int{
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0,
- },
- action: []int{
- 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- -3, 0, 0, 0, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, -5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0,
- -2, 7, 0, -11, 0, 0, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 4,
- 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -14, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 12, 12, 0, 0, -17, 12, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -22, 0, 16, 16, 0, 0, 0, 0, 0, -23,
- -24, -25, -26, -27, -28, -29, 16, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -30, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- -31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -23, -24, -25, -26, -27, -28, -29, 15,
- 0, 18, 0, 0, 18, 18, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 18, 18, 18, 0,
- 25, 0, 0, 25, 25, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -33, 0, 19, 0,
- 0, 19, 19, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, -34, 0, 0, 0, 0, 0, 0, 20, 0, 0, 20,
- 20, 0, 0, 0, 0, 0, 20, 20, 20, 20, 20, 20, 20, 20, 0, 21, 0, 0, 21, 21,
- 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 0, 22, 0, 0, 22, 22, 0,
- 0, 0, 0, 0, 22, 22, 22, 22, 22, 22, 22, 22, 0, 23, 0, 0, 23, 23, 0, 0,
- 0, 0, 0, 23, 23, 23, 23, 23, 23, 23, 23, 0, 24, 0, 0, 24, 24, 0, 0, 0,
- 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 0, 10, 0, 0, 10, 10, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 13, 13, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 17, 17, 0, 0, 0, 0, 0, 17,
- 17, 17, 17, 17, 17, 17, 17, 0, 14, 0, 0, 14, 14, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, -35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -36,
- 0, 0, 0, 0, 0, 26, 0, 0, 26, 26, 0, 0, 0, 0, 0, 26, 26, 26, 26, 26,
- 26, 26, 26,
- },
- goTo: []int{
- 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 0, 10, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 19, 20, 21, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 21,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0,
- },
- alternativeSymbolCounts: []int{
- 0, 1, 4, 4, 3, 2, 1, 0, 1, 1, 3, 1, 0, 3, 3, 1, 0, 2, 1, 1,
- 1, 1, 1, 1, 1, 1, 4,
- },
- errorTrapperStates: []int{
- 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- nonTerminals: []string{
- "",
- "tree'",
- "tree",
- "tree_list",
- "string",
- "raw_string",
- "opt_raw_string_body",
- "interpreted_string",
- "opt_interpreted_string_body",
- "interpreted_string_body",
- "interpreted_string_elem",
- "codepoint_expr",
- },
- lhsSymbols: []int{
- 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
- 10, 10, 10, 10, 10, 10, 11,
- },
- terminals: []string{
- "",
- "<eof>",
- "error",
- "ws",
- "l_paren",
- "r_paren",
- "identifier",
- "raw_string_open",
- "raw_string_body",
- "raw_string_close",
- "interpreted_string_open",
- "interpreted_seq",
- "codepoint_prefix",
- "l_brace",
- "r_brace",
- "hex_digits",
- "escaped_seq",
- "escape_char",
- "interpreted_string_close",
- },
- terminalSkip: []int{
- 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- },
- astActions: [][]int{
- nil,
- nil,
- {
- 2, -3,
- },
- {
- 2, 3,
- },
- {
- 2,
- },
- {
- -1, 2,
- },
- nil,
- nil,
- nil,
- nil,
- {
- -2,
- },
- nil,
- nil,
- {
- -2,
- },
- {
- 2,
- },
- {
- -1,
- },
- nil,
- {
- -1, -2,
- },
- {
- -1,
- },
- nil,
- nil,
- nil,
- nil,
- nil,
- nil,
- nil,
- {
- 3,
- },
- },
- }
-}
-
-func (g *grammarImpl) InitialState() int {
- return 0
-}
-
-func (g *grammarImpl) StartProduction() int {
- return 1
-}
-
-func (g *grammarImpl) RecoverProduction(prod int) bool {
- return g.recoverProductions[prod] != 0
-}
-
-func (g *grammarImpl) Action(state int, terminal int) int {
- return g.action[state*19+terminal]
-}
-
-func (g *grammarImpl) GoTo(state int, lhs int) int {
- return g.goTo[state*12+lhs]
-}
-
-func (g *grammarImpl) AlternativeSymbolCount(prod int) int {
- return g.alternativeSymbolCounts[prod]
-}
-
-func (g *grammarImpl) TerminalCount() int {
- return 19
-}
-
-func (g *grammarImpl) SkipTerminal(terminal int) bool {
- return g.terminalSkip[terminal] == 1
-}
-
-func (g *grammarImpl) ErrorTrapperState(state int) bool {
- return g.errorTrapperStates[state] != 0
-}
-
-func (g *grammarImpl) NonTerminal(nonTerminal int) string {
- return g.nonTerminals[nonTerminal]
-}
-
-func (g *grammarImpl) LHS(prod int) int {
- return g.lhsSymbols[prod]
-}
-
-func (g *grammarImpl) EOF() int {
- return 1
-}
-
-func (g *grammarImpl) Error() int {
- return 2
-}
-
-func (g *grammarImpl) Terminal(terminal int) string {
- return g.terminals[terminal]
-}
-
-func (g *grammarImpl) ASTAction(prod int) []int {
- return g.astActions[prod]
-}
-
-type vToken struct {
- terminalID int
- tok *Token
-}
-
-func (t *vToken) TerminalID() int {
- return t.terminalID
-}
-
-func (t *vToken) Lexeme() []byte {
- return t.tok.Lexeme
-}
-
-func (t *vToken) EOF() bool {
- return t.tok.EOF
-}
-
-func (t *vToken) Invalid() bool {
- return t.tok.Invalid
-}
-
-func (t *vToken) Position() (int, int) {
- return t.tok.Row, t.tok.Col
-}
-
-var kindToTerminal = []int{
- 0, 3, 4, 5, 6, 7, 10, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18,
-}
-
-type tokenStream struct {
- lex *Lexer
- kindToTerminal []int
-}
-
-func NewTokenStream(src io.Reader) (*tokenStream, error) {
- lex, err := NewLexer(NewLexSpec(), src)
- if err != nil {
- return nil, err
- }
-
- return &tokenStream{
- lex: lex,
- }, nil
-}
-
-func (t *tokenStream) Next() (VToken, error) {
- tok, err := t.lex.Next()
- if err != nil {
- return nil, err
- }
- return &vToken{
- terminalID: kindToTerminal[tok.KindID],
- tok: tok,
- }, nil
-}
diff --git a/src/urubu/spec/test/tree_semantic_action.go b/src/urubu/spec/test/tree_semantic_action.go
deleted file mode 100644
index c1d5a25..0000000
--- a/src/urubu/spec/test/tree_semantic_action.go
+++ /dev/null
@@ -1,367 +0,0 @@
-// Code generated by vartan-go. DO NOT EDIT.
-package test
-
-import (
- "encoding/json"
- "fmt"
- "io"
- "strconv"
-)
-
-// SemanticActionSet is a set of semantic actions a parser calls.
-type SemanticActionSet interface {
- // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol.
- // When the parser recovered from an error state by shifting the token, `recovered` is true.
- Shift(tok VToken, recovered bool)
-
- // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production.
- // When the parser recovered from an error state by reducing the production, `recovered` is true.
- Reduce(prodNum int, recovered bool)
-
- // Accept runs when the parser accepts an input.
- Accept()
-
- // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack.
- // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards
- // from the state stack.
- // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token
- // corresponding to the error symbol doesn't exist.
- TrapAndShiftError(cause VToken, popped int)
-
- // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error.
- MissError(cause VToken)
-}
-
-var _ SemanticActionSet = &SyntaxTreeActionSet{}
-
-// SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface.
-type SyntaxTreeNode interface {
- // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast`
- // directive with `...` operator.
- ChildCount() int
-
- // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast`
- // directive with `...` operator.
- ExpandChildren() []SyntaxTreeNode
-}
-
-var _ SyntaxTreeNode = &Node{}
-
-// SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types.
-// The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface.
-type SyntaxTreeBuilder interface {
- Shift(kindName string, text string, row, col int) SyntaxTreeNode
- ShiftError(kindName string) SyntaxTreeNode
- Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode
- Accept(f SyntaxTreeNode)
-}
-
-var _ SyntaxTreeBuilder = &DefaulSyntaxTreeBuilder{}
-
-// DefaulSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder.
-type DefaulSyntaxTreeBuilder struct {
- tree *Node
-}
-
-// NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder.
-func NewDefaultSyntaxTreeBuilder() *DefaulSyntaxTreeBuilder {
- return &DefaulSyntaxTreeBuilder{}
-}
-
-// Shift is a implementation of SyntaxTreeBuilder.Shift.
-func (b *DefaulSyntaxTreeBuilder) Shift(kindName string, text string, row, col int) SyntaxTreeNode {
- return &Node{
- Type: NodeTypeTerminal,
- KindName: kindName,
- Text: text,
- Row: row,
- Col: col,
- }
-}
-
-// ShiftError is a implementation of SyntaxTreeBuilder.ShiftError.
-func (b *DefaulSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode {
- return &Node{
- Type: NodeTypeError,
- KindName: kindName,
- }
-}
-
-// Reduce is a implementation of SyntaxTreeBuilder.Reduce.
-func (b *DefaulSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode {
- cNodes := make([]*Node, len(children))
- for i, c := range children {
- cNodes[i] = c.(*Node)
- }
- return &Node{
- Type: NodeTypeNonTerminal,
- KindName: kindName,
- Children: cNodes,
- }
-}
-
-// Accept is a implementation of SyntaxTreeBuilder.Accept.
-func (b *DefaulSyntaxTreeBuilder) Accept(f SyntaxTreeNode) {
- b.tree = f.(*Node)
-}
-
-// Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil.
-func (b *DefaulSyntaxTreeBuilder) Tree() *Node {
- return b.tree
-}
-
-// SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree.
-type SyntaxTreeActionSet struct {
- gram Grammar
- builder SyntaxTreeBuilder
- semStack *semanticStack
- disableASTAction bool
-}
-
-// NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree).
-// When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them.
-func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet {
- return &SyntaxTreeActionSet{
- gram: gram,
- builder: builder,
- semStack: newSemanticStack(),
- }
-}
-
-// NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree).
-// Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them.
-func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet {
- return &SyntaxTreeActionSet{
- gram: gram,
- builder: builder,
- semStack: newSemanticStack(),
- disableASTAction: true,
- }
-}
-
-// Shift is a implementation of SemanticActionSet.Shift method.
-func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) {
- term := a.tokenToTerminal(tok)
- row, col := tok.Position()
- a.semStack.push(a.builder.Shift(a.gram.Terminal(term), string(tok.Lexeme()), row, col))
-}
-
-// Reduce is a implementation of SemanticActionSet.Reduce method.
-func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) {
- lhs := a.gram.LHS(prodNum)
-
- // When an alternative is empty, `n` will be 0, and `handle` will be empty slice.
- n := a.gram.AlternativeSymbolCount(prodNum)
- handle := a.semStack.pop(n)
-
- var astAct []int
- if !a.disableASTAction {
- astAct = a.gram.ASTAction(prodNum)
- }
- var children []SyntaxTreeNode
- if astAct != nil {
- // Count the number of children in advance to avoid frequent growth in a slice for children.
- {
- l := 0
- for _, e := range astAct {
- if e > 0 {
- l++
- } else {
- offset := e*-1 - 1
- l += handle[offset].ChildCount()
- }
- }
-
- children = make([]SyntaxTreeNode, l)
- }
-
- p := 0
- for _, e := range astAct {
- if e > 0 {
- offset := e - 1
- children[p] = handle[offset]
- p++
- } else {
- offset := e*-1 - 1
- for _, c := range handle[offset].ExpandChildren() {
- children[p] = c
- p++
- }
- }
- }
- } else {
- // If an alternative has no AST action, a driver generates
- // a node with the same structure as a CST.
- children = handle
- }
-
- a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children))
-}
-
-// Accept is a implementation of SemanticActionSet.Accept method.
-func (a *SyntaxTreeActionSet) Accept() {
- top := a.semStack.pop(1)
- a.builder.Accept(top[0])
-}
-
-// TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method.
-func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) {
- a.semStack.pop(popped)
- a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error())))
-}
-
-// MissError is a implementation of SemanticActionSet.MissError method.
-func (a *SyntaxTreeActionSet) MissError(cause VToken) {
-}
-
-func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int {
- if tok.EOF() {
- return a.gram.EOF()
- }
-
- return tok.TerminalID()
-}
-
-type semanticStack struct {
- frames []SyntaxTreeNode
-}
-
-func newSemanticStack() *semanticStack {
- return &semanticStack{
- frames: make([]SyntaxTreeNode, 0, 100),
- }
-}
-
-func (s *semanticStack) push(f SyntaxTreeNode) {
- s.frames = append(s.frames, f)
-}
-
-func (s *semanticStack) pop(n int) []SyntaxTreeNode {
- fs := s.frames[len(s.frames)-n:]
- s.frames = s.frames[:len(s.frames)-n]
-
- return fs
-}
-
-type NodeType int
-
-const (
- NodeTypeError = 0
- NodeTypeTerminal = 1
- NodeTypeNonTerminal = 2
-)
-
-// Node is a implementation of SyntaxTreeNode interface.
-type Node struct {
- Type NodeType
- KindName string
- Text string
- Row int
- Col int
- Children []*Node
-}
-
-func (n *Node) MarshalJSON() ([]byte, error) {
- switch n.Type {
- case NodeTypeError:
- return json.Marshal(struct {
- Type NodeType `json:"type"`
- KindName string `json:"kind_name"`
- }{
- Type: n.Type,
- KindName: n.KindName,
- })
- case NodeTypeTerminal:
- if n.KindName == "" {
- return json.Marshal(struct {
- Type NodeType `json:"type"`
- Text string `json:"text"`
- Row int `json:"row"`
- Col int `json:"col"`
- }{
- Type: n.Type,
- Text: n.Text,
- Row: n.Row,
- Col: n.Col,
- })
- }
- return json.Marshal(struct {
- Type NodeType `json:"type"`
- KindName string `json:"kind_name"`
- Text string `json:"text"`
- Row int `json:"row"`
- Col int `json:"col"`
- }{
- Type: n.Type,
- KindName: n.KindName,
- Text: n.Text,
- Row: n.Row,
- Col: n.Col,
- })
- case NodeTypeNonTerminal:
- return json.Marshal(struct {
- Type NodeType `json:"type"`
- KindName string `json:"kind_name"`
- Children []*Node `json:"children"`
- }{
- Type: n.Type,
- KindName: n.KindName,
- Children: n.Children,
- })
- default:
- return nil, fmt.Errorf("invalid node type: %v", n.Type)
- }
-}
-
-// ChildCount is a implementation of SyntaxTreeNode.ChildCount.
-func (n *Node) ChildCount() int {
- return len(n.Children)
-}
-
-// ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren.
-func (n *Node) ExpandChildren() []SyntaxTreeNode {
- fs := make([]SyntaxTreeNode, len(n.Children))
- for i, n := range n.Children {
- fs[i] = n
- }
- return fs
-}
-
-// PrintTree prints a syntax tree whose root is `node`.
-func PrintTree(w io.Writer, node *Node) {
- printTree(w, node, "", "")
-}
-
-func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) {
- if node == nil {
- return
- }
-
- switch node.Type {
- case NodeTypeError:
- fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
- case NodeTypeTerminal:
- fmt.Fprintf(w, "%v%v %v\n", ruledLine, node.KindName, strconv.Quote(node.Text))
- case NodeTypeNonTerminal:
- fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName)
-
- num := len(node.Children)
- for i, child := range node.Children {
- var line string
- if num > 1 && i < num-1 {
- line = "├─ "
- } else {
- line = "└─ "
- }
-
- var prefix string
- if i >= num-1 {
- prefix = " "
- } else {
- prefix = "│ "
- }
-
- printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix)
- }
- }
-}
diff --git a/src/urubu/spec/test/tree-report.json b/src/urubu/spec/tree-report.json
index c2018e5..c2018e5 100644
--- a/src/urubu/spec/test/tree-report.json
+++ b/src/urubu/spec/tree-report.json
diff --git a/src/urubu/spec/test/tree.json b/src/urubu/spec/tree.json
index f05c2f2..f05c2f2 100644
--- a/src/urubu/spec/test/tree.json
+++ b/src/urubu/spec/tree.json
diff --git a/src/urubu/spec/test/tree.vartan b/src/urubu/spec/tree.vartan
index aa8f733..aa8f733 100644
--- a/src/urubu/spec/test/tree.vartan
+++ b/src/urubu/spec/tree.vartan