diff options
Diffstat (limited to 'src/urubu/spec')
-rw-r--r-- | src/urubu/spec/grammar.go (renamed from src/urubu/spec/grammar/grammar.go) | 93 | ||||
-rw-r--r-- | src/urubu/spec/grammar/clexspec.json (renamed from src/urubu/spec/grammar/parser/clexspec.json) | 0 | ||||
-rw-r--r-- | src/urubu/spec/grammar/description.go | 71 | ||||
-rw-r--r-- | src/urubu/spec/grammar/lexspec.json (renamed from src/urubu/spec/grammar/parser/lexspec.json) | 0 | ||||
-rw-r--r-- | src/urubu/spec/grammar/parser.go (renamed from src/urubu/spec/grammar/parser/vartan_lexer.go) | 911 | ||||
-rw-r--r-- | src/urubu/spec/grammar/parser/lexer.go | 297 | ||||
-rw-r--r-- | src/urubu/spec/grammar/parser/parser.go | 582 | ||||
-rw-r--r-- | src/urubu/spec/grammar/parser/syntax_error.go | 45 | ||||
-rw-r--r-- | src/urubu/spec/grammar/util.go | 21 | ||||
-rw-r--r-- | src/urubu/spec/test.go (renamed from src/urubu/spec/test/tree_lexer.go) | 1402 | ||||
-rw-r--r-- | src/urubu/spec/test/parser.go | 336 | ||||
-rw-r--r-- | src/urubu/spec/test/tree_parser.go | 716 | ||||
-rw-r--r-- | src/urubu/spec/test/tree_semantic_action.go | 367 | ||||
-rw-r--r-- | src/urubu/spec/tree-report.json (renamed from src/urubu/spec/test/tree-report.json) | 0 | ||||
-rw-r--r-- | src/urubu/spec/tree.json (renamed from src/urubu/spec/test/tree.json) | 0 | ||||
-rw-r--r-- | src/urubu/spec/tree.vartan (renamed from src/urubu/spec/test/tree.vartan) | 0 |
16 files changed, 2403 insertions, 2438 deletions
diff --git a/src/urubu/spec/grammar/grammar.go b/src/urubu/spec/grammar.go index bf1ea89..c2708d8 100644 --- a/src/urubu/spec/grammar/grammar.go +++ b/src/urubu/spec/grammar.go @@ -1,6 +1,79 @@ package grammar -import "strconv" +import ( + "strconv" + "strings" +) + +type Terminal struct { + Number int `json:"number"` + Name string `json:"name"` + Pattern string `json:"pattern"` + Precedence int `json:"prec"` + Associativity string `json:"assoc"` +} + +type NonTerminal struct { + Number int `json:"number"` + Name string `json:"name"` +} + +type Production struct { + Number int `json:"number"` + LHS int `json:"lhs"` + RHS []int `json:"rhs"` + Precedence int `json:"prec"` + Associativity string `json:"assoc"` +} + +type Item struct { + Production int `json:"production"` + Dot int `json:"dot"` +} + +type Transition struct { + Symbol int `json:"symbol"` + State int `json:"state"` +} + +type Reduce struct { + LookAhead []int `json:"look_ahead"` + Production int `json:"production"` +} + +type SRConflict struct { + Symbol int `json:"symbol"` + State int `json:"state"` + Production int `json:"production"` + AdoptedState *int `json:"adopted_state"` + AdoptedProduction *int `json:"adopted_production"` + ResolvedBy int `json:"resolved_by"` +} + +type RRConflict struct { + Symbol int `json:"symbol"` + Production1 int `json:"production_1"` + Production2 int `json:"production_2"` + AdoptedProduction int `json:"adopted_production"` + ResolvedBy int `json:"resolved_by"` +} + +type State struct { + Number int `json:"number"` + Kernel []*Item `json:"kernel"` + Shift []*Transition `json:"shift"` + Reduce []*Reduce `json:"reduce"` + GoTo []*Transition `json:"goto"` + SRConflict []*SRConflict `json:"sr_conflict"` + RRConflict []*RRConflict `json:"rr_conflict"` +} + +type Report struct { + Terminals []*Terminal `json:"terminals"` + NonTerminals []*NonTerminal `json:"non_terminals"` + Productions []*Production `json:"productions"` + States []*State `json:"states"` +} type CompiledGrammar struct { Name string `json:"name"` @@ -158,3 +231,21 @@ type SyntacticSpec struct { type ASTAction struct { Entries [][]int `json:"entries"` } + +var rep = strings.NewReplacer( + `.`, `\.`, + `*`, `\*`, + `+`, `\+`, + `?`, `\?`, + `|`, `\|`, + `(`, `\(`, + `)`, `\)`, + `[`, `\[`, + `\`, `\\`, +) + +// EscapePattern escapes the special characters. +// For example, EscapePattern(`+`) returns `\+`. +func EscapePattern(s string) string { + return rep.Replace(s) +} diff --git a/src/urubu/spec/grammar/parser/clexspec.json b/src/urubu/spec/grammar/clexspec.json index d0ed3d3..d0ed3d3 100644 --- a/src/urubu/spec/grammar/parser/clexspec.json +++ b/src/urubu/spec/grammar/clexspec.json diff --git a/src/urubu/spec/grammar/description.go b/src/urubu/spec/grammar/description.go deleted file mode 100644 index 0d2a0b7..0000000 --- a/src/urubu/spec/grammar/description.go +++ /dev/null @@ -1,71 +0,0 @@ -package grammar - -type Terminal struct { - Number int `json:"number"` - Name string `json:"name"` - Pattern string `json:"pattern"` - Precedence int `json:"prec"` - Associativity string `json:"assoc"` -} - -type NonTerminal struct { - Number int `json:"number"` - Name string `json:"name"` -} - -type Production struct { - Number int `json:"number"` - LHS int `json:"lhs"` - RHS []int `json:"rhs"` - Precedence int `json:"prec"` - Associativity string `json:"assoc"` -} - -type Item struct { - Production int `json:"production"` - Dot int `json:"dot"` -} - -type Transition struct { - Symbol int `json:"symbol"` - State int `json:"state"` -} - -type Reduce struct { - LookAhead []int `json:"look_ahead"` - Production int `json:"production"` -} - -type SRConflict struct { - Symbol int `json:"symbol"` - State int `json:"state"` - Production int `json:"production"` - AdoptedState *int `json:"adopted_state"` - AdoptedProduction *int `json:"adopted_production"` - ResolvedBy int `json:"resolved_by"` -} - -type RRConflict struct { - Symbol int `json:"symbol"` - Production1 int `json:"production_1"` - Production2 int `json:"production_2"` - AdoptedProduction int `json:"adopted_production"` - ResolvedBy int `json:"resolved_by"` -} - -type State struct { - Number int `json:"number"` - Kernel []*Item `json:"kernel"` - Shift []*Transition `json:"shift"` - Reduce []*Reduce `json:"reduce"` - GoTo []*Transition `json:"goto"` - SRConflict []*SRConflict `json:"sr_conflict"` - RRConflict []*RRConflict `json:"rr_conflict"` -} - -type Report struct { - Terminals []*Terminal `json:"terminals"` - NonTerminals []*NonTerminal `json:"non_terminals"` - Productions []*Production `json:"productions"` - States []*State `json:"states"` -} diff --git a/src/urubu/spec/grammar/parser/lexspec.json b/src/urubu/spec/grammar/lexspec.json index caf1f0e..caf1f0e 100644 --- a/src/urubu/spec/grammar/parser/lexspec.json +++ b/src/urubu/spec/grammar/lexspec.json diff --git a/src/urubu/spec/grammar/parser/vartan_lexer.go b/src/urubu/spec/grammar/parser.go index 76ddfde..0e5a16b 100644 --- a/src/urubu/spec/grammar/parser/vartan_lexer.go +++ b/src/urubu/spec/grammar/parser.go @@ -1,11 +1,920 @@ -// Code generated by maleeni-go. DO NOT EDIT. +//go:generate maleeni compile lexspec.json -o clexspec.json +//go:generate maleeni-go clexspec.json --package parser + package parser import ( + _ "embed" "fmt" "io" "io/ioutil" + "regexp" + "strings" + + verr "urubu/error" + spec "urubu/spec/grammar" +) + +type tokenKind string + +const ( + tokenKindKWFragment = tokenKind("fragment") + tokenKindID = tokenKind("id") + tokenKindTerminalPattern = tokenKind("terminal pattern") + tokenKindStringLiteral = tokenKind("string") + tokenKindColon = tokenKind(":") + tokenKindOr = tokenKind("|") + tokenKindSemicolon = tokenKind(";") + tokenKindLabelMarker = tokenKind("@") + tokenKindDirectiveMarker = tokenKind("#") + tokenKindExpantion = tokenKind("...") + tokenKindOrderedSymbolMarker = tokenKind("$") + tokenKindLParen = tokenKind("(") + tokenKindRParen = tokenKind(")") + tokenKindNewline = tokenKind("newline") + tokenKindEOF = tokenKind("eof") + tokenKindInvalid = tokenKind("invalid") +) + +var ( + reIDChar = regexp.MustCompile(`^[0-9a-z_]+$`) + reIDInvalidDigitsPos = regexp.MustCompile(`^[0-9]`) +) + +type Position struct { + Row int + Col int +} + +func newPosition(row, col int) Position { + return Position{ + Row: row, + Col: col, + } +} + +type token struct { + kind tokenKind + text string + pos Position +} + +func newSymbolToken(kind tokenKind, pos Position) *token { + return &token{ + kind: kind, + pos: pos, + } +} + +func newIDToken(text string, pos Position) *token { + return &token{ + kind: tokenKindID, + text: text, + pos: pos, + } +} + +func newTerminalPatternToken(text string, pos Position) *token { + return &token{ + kind: tokenKindTerminalPattern, + text: text, + pos: pos, + } +} + +func newStringLiteralToken(text string, pos Position) *token { + return &token{ + kind: tokenKindStringLiteral, + text: text, + pos: pos, + } +} + +func newEOFToken() *token { + return &token{ + kind: tokenKindEOF, + } +} + +func newInvalidToken(text string, pos Position) *token { + return &token{ + kind: tokenKindInvalid, + text: text, + pos: pos, + } +} + +type lexer struct { + d *Lexer + buf *token +} + +func newLexer(src io.Reader) (*lexer, error) { + d, err := NewLexer(NewLexSpec(), src) + if err != nil { + return nil, err + } + return &lexer{ + d: d, + }, nil +} + +func (l *lexer) next() (*token, error) { + if l.buf != nil { + tok := l.buf + l.buf = nil + return tok, nil + } + + var newline *token + for { + tok, err := l.lexAndSkipWSs() + if err != nil { + return nil, err + } + if tok.kind == tokenKindNewline { + newline = tok + continue + } + + if newline != nil { + l.buf = tok + return newline, nil + } + return tok, nil + } +} + +func (l *lexer) lexAndSkipWSs() (*token, error) { + var tok *Token + for { + var err error + tok, err = l.d.Next() + if err != nil { + return nil, err + } + if tok.Invalid { + return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil + } + if tok.EOF { + return newEOFToken(), nil + } + switch tok.KindID { + case KindIDWhiteSpace: + continue + case KindIDLineComment: + continue + } + + break + } + + switch tok.KindID { + case KindIDNewline: + return newSymbolToken(tokenKindNewline, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDKwFragment: + return newSymbolToken(tokenKindKWFragment, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDIdentifier: + if !reIDChar.Match(tok.Lexeme) { + return nil, &verr.SpecError{ + Cause: synErrIDInvalidChar, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if strings.HasPrefix(string(tok.Lexeme), "_") || strings.HasSuffix(string(tok.Lexeme), "_") { + return nil, &verr.SpecError{ + Cause: synErrIDInvalidUnderscorePos, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if strings.Contains(string(tok.Lexeme), "__") { + return nil, &verr.SpecError{ + Cause: synErrIDConsecutiveUnderscores, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + if reIDInvalidDigitsPos.Match(tok.Lexeme) { + return nil, &verr.SpecError{ + Cause: synErrIDInvalidDigitsPos, + Detail: string(tok.Lexeme), + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + return newIDToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDTerminalOpen: + var b strings.Builder + for { + tok, err := l.d.Next() + if err != nil { + return nil, err + } + if tok.EOF { + return nil, &verr.SpecError{ + Cause: synErrUnclosedTerminal, + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + switch tok.KindID { + case KindIDPattern: + // The escape sequences in a pattern string are interpreted by the lexer, except for the \". + // We must interpret the \" before passing them to the lexer because they are delimiters for + // the pattern strings. + fmt.Fprint(&b, strings.ReplaceAll(string(tok.Lexeme), `\"`, `"`)) + case KindIDEscapeSymbol: + return nil, &verr.SpecError{ + Cause: synErrIncompletedEscSeq, + Row: tok.Row + 1, + Col: tok.Col + 1, + } + case KindIDTerminalClose: + pat := b.String() + if pat == "" { + return nil, &verr.SpecError{ + Cause: synErrEmptyPattern, + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + return newTerminalPatternToken(pat, newPosition(tok.Row+1, tok.Col+1)), nil + } + } + case KindIDStringLiteralOpen: + var b strings.Builder + for { + tok, err := l.d.Next() + if err != nil { + return nil, err + } + if tok.EOF { + return nil, &verr.SpecError{ + Cause: synErrUnclosedString, + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + switch tok.KindID { + case KindIDCharSeq: + fmt.Fprint(&b, string(tok.Lexeme)) + case KindIDStringLiteralClose: + str := b.String() + if str == "" { + return nil, &verr.SpecError{ + Cause: synErrEmptyString, + Row: tok.Row + 1, + Col: tok.Col + 1, + } + } + return newStringLiteralToken(str, newPosition(tok.Row+1, tok.Col+1)), nil + } + } + case KindIDColon: + return newSymbolToken(tokenKindColon, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDOr: + return newSymbolToken(tokenKindOr, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDSemicolon: + return newSymbolToken(tokenKindSemicolon, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDLabelMarker: + return newSymbolToken(tokenKindLabelMarker, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDDirectiveMarker: + return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDExpansion: + return newSymbolToken(tokenKindExpantion, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDOrderedSymbolMarker: + return newSymbolToken(tokenKindOrderedSymbolMarker, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDLParen: + return newSymbolToken(tokenKindLParen, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDRParen: + return newSymbolToken(tokenKindRParen, newPosition(tok.Row+1, tok.Col+1)), nil + default: + return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil + } +} + +type RootNode struct { + Directives []*DirectiveNode + Productions []*ProductionNode + LexProductions []*ProductionNode + Fragments []*FragmentNode +} + +type ProductionNode struct { + Directives []*DirectiveNode + LHS string + RHS []*AlternativeNode + Pos Position +} + +func (n *ProductionNode) isLexical() bool { + if len(n.RHS) == 1 && len(n.RHS[0].Elements) == 1 && n.RHS[0].Elements[0].Pattern != "" { + return true + } + return false +} + +type AlternativeNode struct { + Elements []*ElementNode + Directives []*DirectiveNode + Pos Position +} + +type ElementNode struct { + ID string + Pattern string + Label *LabelNode + Literally bool + Pos Position +} + +type LabelNode struct { + Name string + Pos Position +} + +type DirectiveNode struct { + Name string + Parameters []*ParameterNode + Pos Position +} + +type ParameterNode struct { + ID string + Pattern string + String string + OrderedSymbol string + Group []*DirectiveNode + Expansion bool + Pos Position +} + +type FragmentNode struct { + LHS string + RHS string + Pos Position +} + +func raiseSyntaxError(row int, synErr *SyntaxError) { + panic(&verr.SpecError{ + Cause: synErr, + Row: row, + }) +} + +func raiseSyntaxErrorWithDetail(row int, synErr *SyntaxError, detail string) { + panic(&verr.SpecError{ + Cause: synErr, + Detail: detail, + Row: row, + }) +} + +func Parse(src io.Reader) (*RootNode, error) { + p, err := newParser(src) + if err != nil { + return nil, err + } + + return p.parse() +} + +type parser struct { + lex *lexer + peekedTok *token + lastTok *token + errs verr.SpecErrors + + // A token position that the parser read at last. + // It is used as additional information in error messages. + pos Position +} + +func newParser(src io.Reader) (*parser, error) { + lex, err := newLexer(src) + if err != nil { + return nil, err + } + return &parser{ + lex: lex, + }, nil +} + +func (p *parser) parse() (root *RootNode, retErr error) { + root = p.parseRoot() + if len(p.errs) > 0 { + return nil, p.errs + } + + return root, nil +} + +func (p *parser) parseRoot() *RootNode { + defer func() { + err := recover() + if err != nil { + specErr, ok := err.(*verr.SpecError) + if !ok { + panic(fmt.Errorf("an unexpected error occurred: %v", err)) + } + p.errs = append(p.errs, specErr) + } + }() + + var dirs []*DirectiveNode + var prods []*ProductionNode + var lexProds []*ProductionNode + var fragments []*FragmentNode + for { + dir := p.parseTopLevelDirective() + if dir != nil { + dirs = append(dirs, dir) + continue + } + + fragment := p.parseFragment() + if fragment != nil { + fragments = append(fragments, fragment) + continue + } + + prod := p.parseProduction() + if prod != nil { + if prod.isLexical() { + lexProds = append(lexProds, prod) + } else { + prods = append(prods, prod) + } + continue + } + + if p.consume(tokenKindEOF) { + break + } + } + + return &RootNode{ + Directives: dirs, + Productions: prods, + LexProductions: lexProds, + Fragments: fragments, + } +} + +func (p *parser) parseTopLevelDirective() *DirectiveNode { + defer func() { + err := recover() + if err == nil { + return + } + + specErr, ok := err.(*verr.SpecError) + if !ok { + panic(err) + } + + p.errs = append(p.errs, specErr) + p.skipOverTo(tokenKindSemicolon) + }() + + dir := p.parseDirective() + if dir == nil { + return nil + } + + p.consume(tokenKindNewline) + + if !p.consume(tokenKindSemicolon) { + raiseSyntaxError(p.pos.Row, synErrTopLevelDirNoSemicolon) + } + + return dir +} + +func (p *parser) parseFragment() *FragmentNode { + defer func() { + err := recover() + if err == nil { + return + } + + specErr, ok := err.(*verr.SpecError) + if !ok { + panic(err) + } + + p.errs = append(p.errs, specErr) + p.skipOverTo(tokenKindSemicolon) + }() + + p.consume(tokenKindNewline) + + if !p.consume(tokenKindKWFragment) { + return nil + } + + p.consume(tokenKindNewline) + + if !p.consume(tokenKindID) { + raiseSyntaxError(p.pos.Row, synErrNoProductionName) + } + lhs := p.lastTok.text + lhsPos := p.lastTok.pos + + p.consume(tokenKindNewline) + + if !p.consume(tokenKindColon) { + raiseSyntaxError(p.pos.Row, synErrNoColon) + } + + var rhs string + switch { + case p.consume(tokenKindTerminalPattern): + rhs = p.lastTok.text + case p.consume(tokenKindStringLiteral): + rhs = spec.EscapePattern(p.lastTok.text) + default: + raiseSyntaxError(p.pos.Row, synErrFragmentNoPattern) + } + + p.consume(tokenKindNewline) + + if !p.consume(tokenKindSemicolon) { + raiseSyntaxError(p.pos.Row, synErrNoSemicolon) + } + + if !p.consume(tokenKindNewline) { + if !p.consume(tokenKindEOF) { + raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline) + } + } + + return &FragmentNode{ + LHS: lhs, + RHS: rhs, + Pos: lhsPos, + } +} + +func (p *parser) parseProduction() *ProductionNode { + defer func() { + err := recover() + if err == nil { + return + } + + specErr, ok := err.(*verr.SpecError) + if !ok { + panic(err) + } + + p.errs = append(p.errs, specErr) + p.skipOverTo(tokenKindSemicolon) + }() + + p.consume(tokenKindNewline) + + if p.consume(tokenKindEOF) { + return nil + } + + if !p.consume(tokenKindID) { + raiseSyntaxError(p.pos.Row, synErrNoProductionName) + } + lhs := p.lastTok.text + lhsPos := p.lastTok.pos + + var dirs []*DirectiveNode + for { + dir := p.parseDirective() + if dir == nil { + break + } + dirs = append(dirs, dir) + } + + p.consume(tokenKindNewline) + + if !p.consume(tokenKindColon) { + raiseSyntaxError(p.pos.Row, synErrNoColon) + } + + alt := p.parseAlternative() + rhs := []*AlternativeNode{alt} + for { + p.consume(tokenKindNewline) + + if !p.consume(tokenKindOr) { + break + } + alt := p.parseAlternative() + rhs = append(rhs, alt) + } + + p.consume(tokenKindNewline) + + if !p.consume(tokenKindSemicolon) { + raiseSyntaxError(p.pos.Row, synErrNoSemicolon) + } + + if !p.consume(tokenKindNewline) { + if !p.consume(tokenKindEOF) { + raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline) + } + } + + prod := &ProductionNode{ + Directives: dirs, + LHS: lhs, + RHS: rhs, + Pos: lhsPos, + } + + // Vartan's driver must provide a user with the names of expected tokens when a syntax error occurs. + // However, if a pattern appears directly in an alternative, Vartan's compiler cannot assign an appropriate + // name to the pattern. Therefore, this code prohibits alternatives from containing patterns. + if !prod.isLexical() { + for _, alt := range prod.RHS { + for _, elem := range alt.Elements { + if elem.Pattern != "" { + raiseSyntaxError(elem.Pos.Row, synErrPatternInAlt) + } + } + } + } + + return prod +} + +func (p *parser) parseAlternative() *AlternativeNode { + elems := []*ElementNode{} + for { + elem := p.parseElement() + if elem == nil { + break + } + elems = append(elems, elem) + } + + // When a length of an alternative is zero, we cannot set a position. + var firstElemPos Position + if len(elems) > 0 { + firstElemPos = elems[0].Pos + } + + var dirs []*DirectiveNode + for { + dir := p.parseDirective() + if dir == nil { + break + } + dirs = append(dirs, dir) + } + + return &AlternativeNode{ + Elements: elems, + Directives: dirs, + Pos: firstElemPos, + } +} + +func (p *parser) parseElement() *ElementNode { + var elem *ElementNode + switch { + case p.consume(tokenKindID): + elem = &ElementNode{ + ID: p.lastTok.text, + Pos: p.lastTok.pos, + } + case p.consume(tokenKindTerminalPattern): + elem = &ElementNode{ + Pattern: p.lastTok.text, + Pos: p.lastTok.pos, + } + case p.consume(tokenKindStringLiteral): + elem = &ElementNode{ + Pattern: p.lastTok.text, + Literally: true, + Pos: p.lastTok.pos, + } + default: + if p.consume(tokenKindLabelMarker) { + raiseSyntaxError(p.pos.Row, synErrLabelWithNoSymbol) + } + return nil + } + if p.consume(tokenKindLabelMarker) { + if !p.consume(tokenKindID) { + raiseSyntaxError(p.pos.Row, synErrNoLabel) + } + elem.Label = &LabelNode{ + Name: p.lastTok.text, + Pos: p.lastTok.pos, + } + } + return elem +} + +func (p *parser) parseDirective() *DirectiveNode { + p.consume(tokenKindNewline) + + if !p.consume(tokenKindDirectiveMarker) { + return nil + } + dirPos := p.lastTok.pos + + if !p.consume(tokenKindID) { + raiseSyntaxError(p.pos.Row, synErrNoDirectiveName) + } + name := p.lastTok.text + + var params []*ParameterNode + for { + param := p.parseParameter() + if param == nil { + break + } + params = append(params, param) + } + + return &DirectiveNode{ + Name: name, + Parameters: params, + Pos: dirPos, + } +} + +func (p *parser) parseParameter() *ParameterNode { + var param *ParameterNode + switch { + case p.consume(tokenKindID): + param = &ParameterNode{ + ID: p.lastTok.text, + Pos: p.lastTok.pos, + } + case p.consume(tokenKindTerminalPattern): + param = &ParameterNode{ + Pattern: p.lastTok.text, + Pos: p.lastTok.pos, + } + case p.consume(tokenKindStringLiteral): + param = &ParameterNode{ + String: p.lastTok.text, + Pos: p.lastTok.pos, + } + case p.consume(tokenKindOrderedSymbolMarker): + if !p.consume(tokenKindID) { + raiseSyntaxError(p.pos.Row, synErrNoOrderedSymbolName) + } + param = &ParameterNode{ + OrderedSymbol: p.lastTok.text, + Pos: p.lastTok.pos, + } + case p.consume(tokenKindLParen): + pos := p.lastTok.pos + var g []*DirectiveNode + for { + dir := p.parseDirective() + if dir == nil { + break + } + g = append(g, dir) + } + if !p.consume(tokenKindRParen) { + raiseSyntaxError(p.pos.Row, synErrUnclosedDirGroup) + } + if len(g) == 0 { + // Set an empty slice representing an empty directive group to distinguish between the following two cases. + // + // - #prec (); // vartan allows this case. + // - #prec; // This case will raise an error. + g = []*DirectiveNode{} + } + param = &ParameterNode{ + Group: g, + Pos: pos, + } + } + if p.consume(tokenKindExpantion) { + switch { + case param == nil: + raiseSyntaxError(p.pos.Row, synErrStrayExpOp) + case param.ID == "": + raiseSyntaxError(p.pos.Row, synErrInvalidExpOperand) + } + param.Expansion = true + } + return param +} + +func (p *parser) consume(expected tokenKind) bool { + var tok *token + var err error + if p.peekedTok != nil { + tok = p.peekedTok + p.peekedTok = nil + } else { + tok, err = p.lex.next() + if err != nil { + panic(err) + } + } + p.pos = tok.pos + if tok.kind == tokenKindInvalid { + raiseSyntaxErrorWithDetail(p.pos.Row, synErrInvalidToken, tok.text) + } + if tok.kind == expected { + p.lastTok = tok + return true + } + p.peekedTok = tok + + return false +} + +func (p *parser) skip() { + var tok *token + var err error + for { + if p.peekedTok != nil { + tok = p.peekedTok + p.peekedTok = nil + } else { + tok, err = p.lex.next() + if err != nil { + p.errs = append(p.errs, &verr.SpecError{ + Cause: err, + Row: p.pos.Row, + }) + continue + } + } + + break + } + + p.lastTok = tok + p.pos = tok.pos +} + +func (p *parser) skipOverTo(kind tokenKind) { + for { + if p.consume(kind) || p.consume(tokenKindEOF) { + return + } + p.skip() + } +} + +type SyntaxError struct { + message string +} + +func newSyntaxError(message string) *SyntaxError { + return &SyntaxError{ + message: message, + } +} + +func (e *SyntaxError) Error() string { + return e.message +} + +var ( + // lexical errors + synErrIDInvalidChar = newSyntaxError("an identifier can contain only the lower-case letter, the digits, and the underscore") + synErrIDInvalidUnderscorePos = newSyntaxError("the underscore cannot be placed at the beginning or end of an identifier") + synErrIDConsecutiveUnderscores = newSyntaxError("the underscore cannot be placed consecutively") + synErrIDInvalidDigitsPos = newSyntaxError("the digits cannot be placed at the biginning of an identifier") + synErrUnclosedTerminal = newSyntaxError("unclosed terminal") + synErrUnclosedString = newSyntaxError("unclosed string") + synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash") + synErrEmptyPattern = newSyntaxError("a pattern must include at least one character") + synErrEmptyString = newSyntaxError("a string must include at least one character") + + // syntax errors + synErrInvalidToken = newSyntaxError("invalid token") + synErrTopLevelDirNoSemicolon = newSyntaxError("a top-level directive must be followed by ;") + synErrNoProductionName = newSyntaxError("a production name is missing") + synErrNoColon = newSyntaxError("the colon must precede alternatives") + synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative") + synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol") + synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @") + synErrNoDirectiveName = newSyntaxError("a directive needs a name") + synErrNoOrderedSymbolName = newSyntaxError("an ordered symbol name is missing") + synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )") + synErrPatternInAlt = newSyntaxError("a pattern literal cannot appear directly in an alternative. instead, please define a terminal symbol with the pattern literal") + synErrStrayExpOp = newSyntaxError("an expansion operator ... must be preceded by an identifier") + synErrInvalidExpOperand = newSyntaxError("an expansion operator ... can be applied to only an identifier") + synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline") + synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element") ) +// Code generated by maleeni-go. DO NOT EDIT. type ModeID int diff --git a/src/urubu/spec/grammar/parser/lexer.go b/src/urubu/spec/grammar/parser/lexer.go deleted file mode 100644 index bd8a24f..0000000 --- a/src/urubu/spec/grammar/parser/lexer.go +++ /dev/null @@ -1,297 +0,0 @@ -//go:generate maleeni compile lexspec.json -o clexspec.json -//go:generate maleeni-go clexspec.json --package parser - -package parser - -import ( - _ "embed" - "fmt" - "io" - "regexp" - "strings" - - verr "urubu/error" -) - -type tokenKind string - -const ( - tokenKindKWFragment = tokenKind("fragment") - tokenKindID = tokenKind("id") - tokenKindTerminalPattern = tokenKind("terminal pattern") - tokenKindStringLiteral = tokenKind("string") - tokenKindColon = tokenKind(":") - tokenKindOr = tokenKind("|") - tokenKindSemicolon = tokenKind(";") - tokenKindLabelMarker = tokenKind("@") - tokenKindDirectiveMarker = tokenKind("#") - tokenKindExpantion = tokenKind("...") - tokenKindOrderedSymbolMarker = tokenKind("$") - tokenKindLParen = tokenKind("(") - tokenKindRParen = tokenKind(")") - tokenKindNewline = tokenKind("newline") - tokenKindEOF = tokenKind("eof") - tokenKindInvalid = tokenKind("invalid") -) - -var ( - reIDChar = regexp.MustCompile(`^[0-9a-z_]+$`) - reIDInvalidDigitsPos = regexp.MustCompile(`^[0-9]`) -) - -type Position struct { - Row int - Col int -} - -func newPosition(row, col int) Position { - return Position{ - Row: row, - Col: col, - } -} - -type token struct { - kind tokenKind - text string - pos Position -} - -func newSymbolToken(kind tokenKind, pos Position) *token { - return &token{ - kind: kind, - pos: pos, - } -} - -func newIDToken(text string, pos Position) *token { - return &token{ - kind: tokenKindID, - text: text, - pos: pos, - } -} - -func newTerminalPatternToken(text string, pos Position) *token { - return &token{ - kind: tokenKindTerminalPattern, - text: text, - pos: pos, - } -} - -func newStringLiteralToken(text string, pos Position) *token { - return &token{ - kind: tokenKindStringLiteral, - text: text, - pos: pos, - } -} - -func newEOFToken() *token { - return &token{ - kind: tokenKindEOF, - } -} - -func newInvalidToken(text string, pos Position) *token { - return &token{ - kind: tokenKindInvalid, - text: text, - pos: pos, - } -} - -type lexer struct { - d *Lexer - buf *token -} - -func newLexer(src io.Reader) (*lexer, error) { - d, err := NewLexer(NewLexSpec(), src) - if err != nil { - return nil, err - } - return &lexer{ - d: d, - }, nil -} - -func (l *lexer) next() (*token, error) { - if l.buf != nil { - tok := l.buf - l.buf = nil - return tok, nil - } - - var newline *token - for { - tok, err := l.lexAndSkipWSs() - if err != nil { - return nil, err - } - if tok.kind == tokenKindNewline { - newline = tok - continue - } - - if newline != nil { - l.buf = tok - return newline, nil - } - return tok, nil - } -} - -func (l *lexer) lexAndSkipWSs() (*token, error) { - var tok *Token - for { - var err error - tok, err = l.d.Next() - if err != nil { - return nil, err - } - if tok.Invalid { - return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil - } - if tok.EOF { - return newEOFToken(), nil - } - switch tok.KindID { - case KindIDWhiteSpace: - continue - case KindIDLineComment: - continue - } - - break - } - - switch tok.KindID { - case KindIDNewline: - return newSymbolToken(tokenKindNewline, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDKwFragment: - return newSymbolToken(tokenKindKWFragment, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDIdentifier: - if !reIDChar.Match(tok.Lexeme) { - return nil, &verr.SpecError{ - Cause: synErrIDInvalidChar, - Detail: string(tok.Lexeme), - Row: tok.Row + 1, - Col: tok.Col + 1, - } - } - if strings.HasPrefix(string(tok.Lexeme), "_") || strings.HasSuffix(string(tok.Lexeme), "_") { - return nil, &verr.SpecError{ - Cause: synErrIDInvalidUnderscorePos, - Detail: string(tok.Lexeme), - Row: tok.Row + 1, - Col: tok.Col + 1, - } - } - if strings.Contains(string(tok.Lexeme), "__") { - return nil, &verr.SpecError{ - Cause: synErrIDConsecutiveUnderscores, - Detail: string(tok.Lexeme), - Row: tok.Row + 1, - Col: tok.Col + 1, - } - } - if reIDInvalidDigitsPos.Match(tok.Lexeme) { - return nil, &verr.SpecError{ - Cause: synErrIDInvalidDigitsPos, - Detail: string(tok.Lexeme), - Row: tok.Row + 1, - Col: tok.Col + 1, - } - } - return newIDToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDTerminalOpen: - var b strings.Builder - for { - tok, err := l.d.Next() - if err != nil { - return nil, err - } - if tok.EOF { - return nil, &verr.SpecError{ - Cause: synErrUnclosedTerminal, - Row: tok.Row + 1, - Col: tok.Col + 1, - } - } - switch tok.KindID { - case KindIDPattern: - // The escape sequences in a pattern string are interpreted by the lexer, except for the \". - // We must interpret the \" before passing them to the lexer because they are delimiters for - // the pattern strings. - fmt.Fprint(&b, strings.ReplaceAll(string(tok.Lexeme), `\"`, `"`)) - case KindIDEscapeSymbol: - return nil, &verr.SpecError{ - Cause: synErrIncompletedEscSeq, - Row: tok.Row + 1, - Col: tok.Col + 1, - } - case KindIDTerminalClose: - pat := b.String() - if pat == "" { - return nil, &verr.SpecError{ - Cause: synErrEmptyPattern, - Row: tok.Row + 1, - Col: tok.Col + 1, - } - } - return newTerminalPatternToken(pat, newPosition(tok.Row+1, tok.Col+1)), nil - } - } - case KindIDStringLiteralOpen: - var b strings.Builder - for { - tok, err := l.d.Next() - if err != nil { - return nil, err - } - if tok.EOF { - return nil, &verr.SpecError{ - Cause: synErrUnclosedString, - Row: tok.Row + 1, - Col: tok.Col + 1, - } - } - switch tok.KindID { - case KindIDCharSeq: - fmt.Fprint(&b, string(tok.Lexeme)) - case KindIDStringLiteralClose: - str := b.String() - if str == "" { - return nil, &verr.SpecError{ - Cause: synErrEmptyString, - Row: tok.Row + 1, - Col: tok.Col + 1, - } - } - return newStringLiteralToken(str, newPosition(tok.Row+1, tok.Col+1)), nil - } - } - case KindIDColon: - return newSymbolToken(tokenKindColon, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDOr: - return newSymbolToken(tokenKindOr, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDSemicolon: - return newSymbolToken(tokenKindSemicolon, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDLabelMarker: - return newSymbolToken(tokenKindLabelMarker, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDDirectiveMarker: - return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDExpansion: - return newSymbolToken(tokenKindExpantion, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDOrderedSymbolMarker: - return newSymbolToken(tokenKindOrderedSymbolMarker, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDLParen: - return newSymbolToken(tokenKindLParen, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDRParen: - return newSymbolToken(tokenKindRParen, newPosition(tok.Row+1, tok.Col+1)), nil - default: - return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil - } -} diff --git a/src/urubu/spec/grammar/parser/parser.go b/src/urubu/spec/grammar/parser/parser.go deleted file mode 100644 index b604074..0000000 --- a/src/urubu/spec/grammar/parser/parser.go +++ /dev/null @@ -1,582 +0,0 @@ -package parser - -import ( - "fmt" - "io" - - verr "urubu/error" - spec "urubu/spec/grammar" -) - -type RootNode struct { - Directives []*DirectiveNode - Productions []*ProductionNode - LexProductions []*ProductionNode - Fragments []*FragmentNode -} - -type ProductionNode struct { - Directives []*DirectiveNode - LHS string - RHS []*AlternativeNode - Pos Position -} - -func (n *ProductionNode) isLexical() bool { - if len(n.RHS) == 1 && len(n.RHS[0].Elements) == 1 && n.RHS[0].Elements[0].Pattern != "" { - return true - } - return false -} - -type AlternativeNode struct { - Elements []*ElementNode - Directives []*DirectiveNode - Pos Position -} - -type ElementNode struct { - ID string - Pattern string - Label *LabelNode - Literally bool - Pos Position -} - -type LabelNode struct { - Name string - Pos Position -} - -type DirectiveNode struct { - Name string - Parameters []*ParameterNode - Pos Position -} - -type ParameterNode struct { - ID string - Pattern string - String string - OrderedSymbol string - Group []*DirectiveNode - Expansion bool - Pos Position -} - -type FragmentNode struct { - LHS string - RHS string - Pos Position -} - -func raiseSyntaxError(row int, synErr *SyntaxError) { - panic(&verr.SpecError{ - Cause: synErr, - Row: row, - }) -} - -func raiseSyntaxErrorWithDetail(row int, synErr *SyntaxError, detail string) { - panic(&verr.SpecError{ - Cause: synErr, - Detail: detail, - Row: row, - }) -} - -func Parse(src io.Reader) (*RootNode, error) { - p, err := newParser(src) - if err != nil { - return nil, err - } - - return p.parse() -} - -type parser struct { - lex *lexer - peekedTok *token - lastTok *token - errs verr.SpecErrors - - // A token position that the parser read at last. - // It is used as additional information in error messages. - pos Position -} - -func newParser(src io.Reader) (*parser, error) { - lex, err := newLexer(src) - if err != nil { - return nil, err - } - return &parser{ - lex: lex, - }, nil -} - -func (p *parser) parse() (root *RootNode, retErr error) { - root = p.parseRoot() - if len(p.errs) > 0 { - return nil, p.errs - } - - return root, nil -} - -func (p *parser) parseRoot() *RootNode { - defer func() { - err := recover() - if err != nil { - specErr, ok := err.(*verr.SpecError) - if !ok { - panic(fmt.Errorf("an unexpected error occurred: %v", err)) - } - p.errs = append(p.errs, specErr) - } - }() - - var dirs []*DirectiveNode - var prods []*ProductionNode - var lexProds []*ProductionNode - var fragments []*FragmentNode - for { - dir := p.parseTopLevelDirective() - if dir != nil { - dirs = append(dirs, dir) - continue - } - - fragment := p.parseFragment() - if fragment != nil { - fragments = append(fragments, fragment) - continue - } - - prod := p.parseProduction() - if prod != nil { - if prod.isLexical() { - lexProds = append(lexProds, prod) - } else { - prods = append(prods, prod) - } - continue - } - - if p.consume(tokenKindEOF) { - break - } - } - - return &RootNode{ - Directives: dirs, - Productions: prods, - LexProductions: lexProds, - Fragments: fragments, - } -} - -func (p *parser) parseTopLevelDirective() *DirectiveNode { - defer func() { - err := recover() - if err == nil { - return - } - - specErr, ok := err.(*verr.SpecError) - if !ok { - panic(err) - } - - p.errs = append(p.errs, specErr) - p.skipOverTo(tokenKindSemicolon) - }() - - dir := p.parseDirective() - if dir == nil { - return nil - } - - p.consume(tokenKindNewline) - - if !p.consume(tokenKindSemicolon) { - raiseSyntaxError(p.pos.Row, synErrTopLevelDirNoSemicolon) - } - - return dir -} - -func (p *parser) parseFragment() *FragmentNode { - defer func() { - err := recover() - if err == nil { - return - } - - specErr, ok := err.(*verr.SpecError) - if !ok { - panic(err) - } - - p.errs = append(p.errs, specErr) - p.skipOverTo(tokenKindSemicolon) - }() - - p.consume(tokenKindNewline) - - if !p.consume(tokenKindKWFragment) { - return nil - } - - p.consume(tokenKindNewline) - - if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.Row, synErrNoProductionName) - } - lhs := p.lastTok.text - lhsPos := p.lastTok.pos - - p.consume(tokenKindNewline) - - if !p.consume(tokenKindColon) { - raiseSyntaxError(p.pos.Row, synErrNoColon) - } - - var rhs string - switch { - case p.consume(tokenKindTerminalPattern): - rhs = p.lastTok.text - case p.consume(tokenKindStringLiteral): - rhs = spec.EscapePattern(p.lastTok.text) - default: - raiseSyntaxError(p.pos.Row, synErrFragmentNoPattern) - } - - p.consume(tokenKindNewline) - - if !p.consume(tokenKindSemicolon) { - raiseSyntaxError(p.pos.Row, synErrNoSemicolon) - } - - if !p.consume(tokenKindNewline) { - if !p.consume(tokenKindEOF) { - raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline) - } - } - - return &FragmentNode{ - LHS: lhs, - RHS: rhs, - Pos: lhsPos, - } -} - -func (p *parser) parseProduction() *ProductionNode { - defer func() { - err := recover() - if err == nil { - return - } - - specErr, ok := err.(*verr.SpecError) - if !ok { - panic(err) - } - - p.errs = append(p.errs, specErr) - p.skipOverTo(tokenKindSemicolon) - }() - - p.consume(tokenKindNewline) - - if p.consume(tokenKindEOF) { - return nil - } - - if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.Row, synErrNoProductionName) - } - lhs := p.lastTok.text - lhsPos := p.lastTok.pos - - var dirs []*DirectiveNode - for { - dir := p.parseDirective() - if dir == nil { - break - } - dirs = append(dirs, dir) - } - - p.consume(tokenKindNewline) - - if !p.consume(tokenKindColon) { - raiseSyntaxError(p.pos.Row, synErrNoColon) - } - - alt := p.parseAlternative() - rhs := []*AlternativeNode{alt} - for { - p.consume(tokenKindNewline) - - if !p.consume(tokenKindOr) { - break - } - alt := p.parseAlternative() - rhs = append(rhs, alt) - } - - p.consume(tokenKindNewline) - - if !p.consume(tokenKindSemicolon) { - raiseSyntaxError(p.pos.Row, synErrNoSemicolon) - } - - if !p.consume(tokenKindNewline) { - if !p.consume(tokenKindEOF) { - raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline) - } - } - - prod := &ProductionNode{ - Directives: dirs, - LHS: lhs, - RHS: rhs, - Pos: lhsPos, - } - - // Vartan's driver must provide a user with the names of expected tokens when a syntax error occurs. - // However, if a pattern appears directly in an alternative, Vartan's compiler cannot assign an appropriate - // name to the pattern. Therefore, this code prohibits alternatives from containing patterns. - if !prod.isLexical() { - for _, alt := range prod.RHS { - for _, elem := range alt.Elements { - if elem.Pattern != "" { - raiseSyntaxError(elem.Pos.Row, synErrPatternInAlt) - } - } - } - } - - return prod -} - -func (p *parser) parseAlternative() *AlternativeNode { - elems := []*ElementNode{} - for { - elem := p.parseElement() - if elem == nil { - break - } - elems = append(elems, elem) - } - - // When a length of an alternative is zero, we cannot set a position. - var firstElemPos Position - if len(elems) > 0 { - firstElemPos = elems[0].Pos - } - - var dirs []*DirectiveNode - for { - dir := p.parseDirective() - if dir == nil { - break - } - dirs = append(dirs, dir) - } - - return &AlternativeNode{ - Elements: elems, - Directives: dirs, - Pos: firstElemPos, - } -} - -func (p *parser) parseElement() *ElementNode { - var elem *ElementNode - switch { - case p.consume(tokenKindID): - elem = &ElementNode{ - ID: p.lastTok.text, - Pos: p.lastTok.pos, - } - case p.consume(tokenKindTerminalPattern): - elem = &ElementNode{ - Pattern: p.lastTok.text, - Pos: p.lastTok.pos, - } - case p.consume(tokenKindStringLiteral): - elem = &ElementNode{ - Pattern: p.lastTok.text, - Literally: true, - Pos: p.lastTok.pos, - } - default: - if p.consume(tokenKindLabelMarker) { - raiseSyntaxError(p.pos.Row, synErrLabelWithNoSymbol) - } - return nil - } - if p.consume(tokenKindLabelMarker) { - if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.Row, synErrNoLabel) - } - elem.Label = &LabelNode{ - Name: p.lastTok.text, - Pos: p.lastTok.pos, - } - } - return elem -} - -func (p *parser) parseDirective() *DirectiveNode { - p.consume(tokenKindNewline) - - if !p.consume(tokenKindDirectiveMarker) { - return nil - } - dirPos := p.lastTok.pos - - if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.Row, synErrNoDirectiveName) - } - name := p.lastTok.text - - var params []*ParameterNode - for { - param := p.parseParameter() - if param == nil { - break - } - params = append(params, param) - } - - return &DirectiveNode{ - Name: name, - Parameters: params, - Pos: dirPos, - } -} - -func (p *parser) parseParameter() *ParameterNode { - var param *ParameterNode - switch { - case p.consume(tokenKindID): - param = &ParameterNode{ - ID: p.lastTok.text, - Pos: p.lastTok.pos, - } - case p.consume(tokenKindTerminalPattern): - param = &ParameterNode{ - Pattern: p.lastTok.text, - Pos: p.lastTok.pos, - } - case p.consume(tokenKindStringLiteral): - param = &ParameterNode{ - String: p.lastTok.text, - Pos: p.lastTok.pos, - } - case p.consume(tokenKindOrderedSymbolMarker): - if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.Row, synErrNoOrderedSymbolName) - } - param = &ParameterNode{ - OrderedSymbol: p.lastTok.text, - Pos: p.lastTok.pos, - } - case p.consume(tokenKindLParen): - pos := p.lastTok.pos - var g []*DirectiveNode - for { - dir := p.parseDirective() - if dir == nil { - break - } - g = append(g, dir) - } - if !p.consume(tokenKindRParen) { - raiseSyntaxError(p.pos.Row, synErrUnclosedDirGroup) - } - if len(g) == 0 { - // Set an empty slice representing an empty directive group to distinguish between the following two cases. - // - // - #prec (); // vartan allows this case. - // - #prec; // This case will raise an error. - g = []*DirectiveNode{} - } - param = &ParameterNode{ - Group: g, - Pos: pos, - } - } - if p.consume(tokenKindExpantion) { - switch { - case param == nil: - raiseSyntaxError(p.pos.Row, synErrStrayExpOp) - case param.ID == "": - raiseSyntaxError(p.pos.Row, synErrInvalidExpOperand) - } - param.Expansion = true - } - return param -} - -func (p *parser) consume(expected tokenKind) bool { - var tok *token - var err error - if p.peekedTok != nil { - tok = p.peekedTok - p.peekedTok = nil - } else { - tok, err = p.lex.next() - if err != nil { - panic(err) - } - } - p.pos = tok.pos - if tok.kind == tokenKindInvalid { - raiseSyntaxErrorWithDetail(p.pos.Row, synErrInvalidToken, tok.text) - } - if tok.kind == expected { - p.lastTok = tok - return true - } - p.peekedTok = tok - - return false -} - -func (p *parser) skip() { - var tok *token - var err error - for { - if p.peekedTok != nil { - tok = p.peekedTok - p.peekedTok = nil - } else { - tok, err = p.lex.next() - if err != nil { - p.errs = append(p.errs, &verr.SpecError{ - Cause: err, - Row: p.pos.Row, - }) - continue - } - } - - break - } - - p.lastTok = tok - p.pos = tok.pos -} - -func (p *parser) skipOverTo(kind tokenKind) { - for { - if p.consume(kind) || p.consume(tokenKindEOF) { - return - } - p.skip() - } -} diff --git a/src/urubu/spec/grammar/parser/syntax_error.go b/src/urubu/spec/grammar/parser/syntax_error.go deleted file mode 100644 index 719fb94..0000000 --- a/src/urubu/spec/grammar/parser/syntax_error.go +++ /dev/null @@ -1,45 +0,0 @@ -package parser - -type SyntaxError struct { - message string -} - -func newSyntaxError(message string) *SyntaxError { - return &SyntaxError{ - message: message, - } -} - -func (e *SyntaxError) Error() string { - return e.message -} - -var ( - // lexical errors - synErrIDInvalidChar = newSyntaxError("an identifier can contain only the lower-case letter, the digits, and the underscore") - synErrIDInvalidUnderscorePos = newSyntaxError("the underscore cannot be placed at the beginning or end of an identifier") - synErrIDConsecutiveUnderscores = newSyntaxError("the underscore cannot be placed consecutively") - synErrIDInvalidDigitsPos = newSyntaxError("the digits cannot be placed at the biginning of an identifier") - synErrUnclosedTerminal = newSyntaxError("unclosed terminal") - synErrUnclosedString = newSyntaxError("unclosed string") - synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash") - synErrEmptyPattern = newSyntaxError("a pattern must include at least one character") - synErrEmptyString = newSyntaxError("a string must include at least one character") - - // syntax errors - synErrInvalidToken = newSyntaxError("invalid token") - synErrTopLevelDirNoSemicolon = newSyntaxError("a top-level directive must be followed by ;") - synErrNoProductionName = newSyntaxError("a production name is missing") - synErrNoColon = newSyntaxError("the colon must precede alternatives") - synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative") - synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol") - synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @") - synErrNoDirectiveName = newSyntaxError("a directive needs a name") - synErrNoOrderedSymbolName = newSyntaxError("an ordered symbol name is missing") - synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )") - synErrPatternInAlt = newSyntaxError("a pattern literal cannot appear directly in an alternative. instead, please define a terminal symbol with the pattern literal") - synErrStrayExpOp = newSyntaxError("an expansion operator ... must be preceded by an identifier") - synErrInvalidExpOperand = newSyntaxError("an expansion operator ... can be applied to only an identifier") - synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline") - synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element") -) diff --git a/src/urubu/spec/grammar/util.go b/src/urubu/spec/grammar/util.go deleted file mode 100644 index bf3f233..0000000 --- a/src/urubu/spec/grammar/util.go +++ /dev/null @@ -1,21 +0,0 @@ -package grammar - -import "strings" - -var rep = strings.NewReplacer( - `.`, `\.`, - `*`, `\*`, - `+`, `\+`, - `?`, `\?`, - `|`, `\|`, - `(`, `\(`, - `)`, `\)`, - `[`, `\[`, - `\`, `\\`, -) - -// EscapePattern escapes the special characters. -// For example, EscapePattern(`+`) returns `\+`. -func EscapePattern(s string) string { - return rep.Replace(s) -} diff --git a/src/urubu/spec/test/tree_lexer.go b/src/urubu/spec/test.go index 8bb1c87..4985e14 100644 --- a/src/urubu/spec/test/tree_lexer.go +++ b/src/urubu/spec/test.go @@ -1,11 +1,342 @@ -// Code generated by vartan-go. DO NOT EDIT. +//go:generate vartan compile tree.vartan -o tree.json +//go:generate vartan-go tree.json --package test + package test import ( + "bufio" + "bytes" + "encoding/json" + "errors" "fmt" "io" + "regexp" + "strconv" + "strings" + "unicode/utf8" ) +type TreeDiff struct { + ExpectedPath string + ActualPath string + Message string +} + +func newTreeDiff(expected, actual *Tree, message string) *TreeDiff { + return &TreeDiff{ + ExpectedPath: expected.path(), + ActualPath: actual.path(), + Message: message, + } +} + +type Tree struct { + Parent *Tree + Offset int + Kind string + Children []*Tree + Lexeme string +} + +func NewNonTerminalTree(kind string, children ...*Tree) *Tree { + return &Tree{ + Kind: kind, + Children: children, + } +} + +func NewTerminalNode(kind string, lexeme string) *Tree { + return &Tree{ + Kind: kind, + Lexeme: lexeme, + } +} + +func (t *Tree) Fill() *Tree { + for i, c := range t.Children { + c.Parent = t + c.Offset = i + c.Fill() + } + return t +} + +func (t *Tree) path() string { + if t.Parent == nil { + return t.Kind + } + return fmt.Sprintf("%v.[%v]%v", t.Parent.path(), t.Offset, t.Kind) +} + +func (t *Tree) Format() []byte { + var b bytes.Buffer + t.format(&b, 0) + return b.Bytes() +} + +func (t *Tree) format(buf *bytes.Buffer, depth int) { + for i := 0; i < depth; i++ { + buf.WriteString(" ") + } + buf.WriteString("(") + buf.WriteString(t.Kind) + if len(t.Children) > 0 { + buf.WriteString("\n") + for i, c := range t.Children { + c.format(buf, depth+1) + if i < len(t.Children)-1 { + buf.WriteString("\n") + } + } + } + buf.WriteString(")") +} + +func DiffTree(expected, actual *Tree) []*TreeDiff { + if expected == nil && actual == nil { + return nil + } + if actual.Kind != expected.Kind { + msg := fmt.Sprintf("unexpected kind: expected '%v' but got '%v'", expected.Kind, actual.Kind) + return []*TreeDiff{ + newTreeDiff(expected, actual, msg), + } + } + if expected.Lexeme != actual.Lexeme { + msg := fmt.Sprintf("unexpected lexeme: expected '%v' but got '%v'", expected.Lexeme, actual.Lexeme) + return []*TreeDiff{ + newTreeDiff(expected, actual, msg), + } + } + if len(actual.Children) != len(expected.Children) { + msg := fmt.Sprintf("unexpected node count: expected %v but got %v", len(expected.Children), len(actual.Children)) + return []*TreeDiff{ + newTreeDiff(expected, actual, msg), + } + } + var diffs []*TreeDiff + for i, exp := range expected.Children { + if ds := DiffTree(exp, actual.Children[i]); len(ds) > 0 { + diffs = append(diffs, ds...) + } + } + return diffs +} + +type TestCase struct { + Description string + Source []byte + Output *Tree +} + +func ParseTestCase(r io.Reader) (*TestCase, error) { + parts, err := splitIntoParts(r) + if err != nil { + return nil, err + } + if len(parts) != 3 { + return nil, fmt.Errorf("too many or too few part delimiters: a test case consists of just tree parts: %v parts found", len(parts)) + } + + tp := &treeParser{ + lineOffset: parts[0].lineCount + parts[1].lineCount + 2, + } + tree, err := tp.parseTree(bytes.NewReader(parts[2].buf)) + if err != nil { + return nil, err + } + + return &TestCase{ + Description: string(parts[0].buf), + Source: parts[1].buf, + Output: tree, + }, nil +} + +type testCasePart struct { + buf []byte + lineCount int +} + +func splitIntoParts(r io.Reader) ([]*testCasePart, error) { + var bufs []*testCasePart + s := bufio.NewScanner(r) + for { + buf, lineCount, err := readPart(s) + if err != nil { + return nil, err + } + if buf == nil { + break + } + bufs = append(bufs, &testCasePart{ + buf: buf, + lineCount: lineCount, + }) + } + if err := s.Err(); err != nil { + return nil, err + } + return bufs, nil +} + +var reDelim = regexp.MustCompile(`^\s*---+\s*$`) + +func readPart(s *bufio.Scanner) ([]byte, int, error) { + if !s.Scan() { + return nil, 0, s.Err() + } + buf := &bytes.Buffer{} + line := s.Bytes() + if reDelim.Match(line) { + // Return an empty slice because (*bytes.Buffer).Bytes() returns nil if we have never written data. + return []byte{}, 0, nil + } + _, err := buf.Write(line) + if err != nil { + return nil, 0, err + } + lineCount := 1 + for s.Scan() { + line := s.Bytes() + if reDelim.Match(line) { + return buf.Bytes(), lineCount, nil + } + _, err := buf.Write([]byte("\n")) + if err != nil { + return nil, 0, err + } + _, err = buf.Write(line) + if err != nil { + return nil, 0, err + } + lineCount++ + } + if err := s.Err(); err != nil { + return nil, 0, err + } + return buf.Bytes(), lineCount, nil +} + +type treeParser struct { + lineOffset int +} + +func (tp *treeParser) parseTree(src io.Reader) (*Tree, error) { + toks, err := NewTokenStream(src) + if err != nil { + return nil, err + } + gram := NewGrammar() + tb := NewDefaultSyntaxTreeBuilder() + p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb))) + if err != nil { + return nil, err + } + err = p.Parse() + if err != nil { + return nil, err + } + synErrs := p.SyntaxErrors() + if len(synErrs) > 0 { + var b strings.Builder + b.Write(formatSyntaxError(synErrs[0], gram, tp.lineOffset)) + for _, synErr := range synErrs[1:] { + b.WriteRune('\n') + b.Write(formatSyntaxError(synErr, gram, tp.lineOffset)) + } + return nil, errors.New(b.String()) + } + t, err := tp.genTree(tb.Tree()) + if err != nil { + return nil, err + } + return t.Fill(), nil +} + +func formatSyntaxError(synErr *SyntaxError, gram Grammar, lineOffset int) []byte { + var b bytes.Buffer + + b.WriteString(fmt.Sprintf("%v:%v: %v: ", lineOffset+synErr.Row+1, synErr.Col+1, synErr.Message)) + + tok := synErr.Token + switch { + case tok.EOF(): + b.WriteString("<eof>") + case tok.Invalid(): + b.WriteString(fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme()))) + default: + if term := gram.Terminal(tok.TerminalID()); term != "" { + b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), term)) + } else { + b.WriteString(fmt.Sprintf("'%v'", string(tok.Lexeme()))) + } + } + b.WriteString(fmt.Sprintf(": expected: %v", synErr.ExpectedTerminals[0])) + for _, t := range synErr.ExpectedTerminals[1:] { + b.WriteString(fmt.Sprintf(", %v", t)) + } + + return b.Bytes() +} + +func (tp *treeParser) genTree(node *Node) (*Tree, error) { + // A node labeled 'error' cannot have children. It always must be (error). + if sym := node.Children[0]; sym.Text == "error" { + if len(node.Children) > 1 { + return nil, fmt.Errorf("%v:%v: error node cannot take children", tp.lineOffset+sym.Row+1, sym.Col+1) + } + return NewTerminalNode(sym.Text, ""), nil + } + + if len(node.Children) == 2 && node.Children[1].KindName == "string" { + var text string + str := node.Children[1].Children[0] + switch str.KindName { + case "raw_string": + text = str.Children[0].Text + case "interpreted_string": + var b strings.Builder + for _, c := range str.Children { + switch c.KindName { + case "escaped_seq": + b.WriteString(strings.TrimPrefix(`\`, c.Text)) + case "escape_char": + return nil, fmt.Errorf("%v:%v: incomplete escape sequence", tp.lineOffset+c.Row+1, c.Col+1) + case "codepoint_expr": + cp := c.Children[0] + n, err := strconv.ParseInt(cp.Text, 16, 64) + if err != nil { + return nil, fmt.Errorf("%v:%v: %v", tp.lineOffset+cp.Row+1, cp.Col+1, err) + } + if !utf8.ValidRune(rune(n)) { + return nil, fmt.Errorf("%v:%v: invalid code point: %v", tp.lineOffset+cp.Row+1, cp.Col+1, cp.Text) + } + b.WriteRune(rune(n)) + default: + b.WriteString(c.Text) + } + } + text = b.String() + } + return NewTerminalNode(node.Children[0].Text, text), nil + } + + var children []*Tree + if len(node.Children) > 1 { + children = make([]*Tree, len(node.Children)-1) + for i, c := range node.Children[1:] { + var err error + children[i], err = tp.genTree(c) + if err != nil { + return nil, err + } + } + } + return NewNonTerminalTree(node.Children[0].Text, children...), nil +} +// Code generated by vartan-go. DO NOT EDIT. + type ModeID int func (id ModeID) Int() int { @@ -1022,3 +1353,1072 @@ func (s *lexSpec) KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, strin id := s.kindIDs[mode][modeKind] return id, s.kindNames[id] } +// Code generated by vartan-go. DO NOT EDIT. + +type Grammar interface { + // InitialState returns the initial state of a parser. + InitialState() int + + // StartProduction returns the start production of grammar. + StartProduction() int + + // Action returns an ACTION entry corresponding to a (state, terminal symbol) pair. + Action(state int, terminal int) int + + // GoTo returns a GOTO entry corresponding to a (state, non-terminal symbol) pair. + GoTo(state int, lhs int) int + + // ErrorTrapperState returns true when a state can shift the error symbol. + ErrorTrapperState(state int) bool + + // LHS returns a LHS symbol of a production. + LHS(prod int) int + + // AlternativeSymbolCount returns a symbol count of p production. + AlternativeSymbolCount(prod int) int + + // RecoverProduction returns true when a production has the recover directive. + RecoverProduction(prod int) bool + + // NonTerminal retuns a string representaion of a non-terminal symbol. + NonTerminal(nonTerminal int) string + + // TerminalCount returns a terminal symbol count of grammar. + TerminalCount() int + + // SkipTerminal returns true when a terminal symbol must be skipped on syntax analysis. + SkipTerminal(terminal int) bool + + // EOF returns the EOF symbol. + EOF() int + + // Error returns the error symbol. + Error() int + + // Terminal retuns a string representaion of a terminal symbol. + Terminal(terminal int) string + + // ASTAction returns an AST action entries. + ASTAction(prod int) []int +} + +type VToken interface { + // TerminalID returns a terminal ID. + TerminalID() int + + // Lexeme returns a lexeme. + Lexeme() []byte + + // EOF returns true when a token represents EOF. + EOF() bool + + // Invalid returns true when a token is invalid. + Invalid() bool + + // Position returns (row, column) pair. + Position() (int, int) +} + +type TokenStream interface { + Next() (VToken, error) +} + +type SyntaxError struct { + Row int + Col int + Message string + Token VToken + ExpectedTerminals []string +} + +type ParserOption func(p *Parser) error + +// DisableLAC disables LAC (lookahead correction). LAC is enabled by default. +func DisableLAC() ParserOption { + return func(p *Parser) error { + p.disableLAC = true + return nil + } +} + +func SemanticAction(semAct SemanticActionSet) ParserOption { + return func(p *Parser) error { + p.semAct = semAct + return nil + } +} + +type Parser struct { + toks TokenStream + gram Grammar + stateStack *stateStack + semAct SemanticActionSet + disableLAC bool + onError bool + shiftCount int + synErrs []*SyntaxError +} + +func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) { + p := &Parser{ + toks: toks, + gram: gram, + stateStack: &stateStack{}, + } + + for _, opt := range opts { + err := opt(p) + if err != nil { + return nil, err + } + } + + return p, nil +} + +func (p *Parser) Parse() error { + p.stateStack.push(p.gram.InitialState()) + tok, err := p.nextToken() + if err != nil { + return err + } + +ACTION_LOOP: + for { + act := p.lookupAction(tok) + + switch { + case act < 0: // Shift + nextState := act * -1 + + recovered := false + if p.onError { + p.shiftCount++ + + // When the parser performs shift three times, the parser recovers from the error state. + if p.shiftCount >= 3 { + p.onError = false + p.shiftCount = 0 + recovered = true + } + } + + p.shift(nextState) + + if p.semAct != nil { + p.semAct.Shift(tok, recovered) + } + + tok, err = p.nextToken() + if err != nil { + return err + } + case act > 0: // Reduce + prodNum := act + + recovered := false + if p.onError && p.gram.RecoverProduction(prodNum) { + p.onError = false + p.shiftCount = 0 + recovered = true + } + + accepted := p.reduce(prodNum) + if accepted { + if p.semAct != nil { + p.semAct.Accept() + } + + return nil + } + + if p.semAct != nil { + p.semAct.Reduce(prodNum, recovered) + } + default: // Error + if p.onError { + tok, err = p.nextToken() + if err != nil { + return err + } + if tok.EOF() { + if p.semAct != nil { + p.semAct.MissError(tok) + } + + return nil + } + + continue ACTION_LOOP + } + + row, col := tok.Position() + p.synErrs = append(p.synErrs, &SyntaxError{ + Row: row, + Col: col, + Message: "unexpected token", + Token: tok, + ExpectedTerminals: p.searchLookahead(p.stateStack.top()), + }) + + count, ok := p.trapError() + if !ok { + if p.semAct != nil { + p.semAct.MissError(tok) + } + + return nil + } + + p.onError = true + p.shiftCount = 0 + + act, err := p.lookupActionOnError() + if err != nil { + return err + } + + p.shift(act * -1) + + if p.semAct != nil { + p.semAct.TrapAndShiftError(tok, count) + } + } + } +} + +// validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid, +// this method returns `true`. +func (p *Parser) validateLookahead(term int) bool { + p.stateStack.enableExploratoryMode() + defer p.stateStack.disableExploratoryMode() + + for { + act := p.gram.Action(p.stateStack.topExploratorily(), term) + + switch { + case act < 0: // Shift + return true + case act > 0: // Reduce + prodNum := act + + lhs := p.gram.LHS(prodNum) + if lhs == p.gram.LHS(p.gram.StartProduction()) { + return true + } + n := p.gram.AlternativeSymbolCount(prodNum) + p.stateStack.popExploratorily(n) + state := p.gram.GoTo(p.stateStack.topExploratorily(), lhs) + p.stateStack.pushExploratorily(state) + default: // Error + return false + } + } +} + +func (p *Parser) nextToken() (VToken, error) { + for { + // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0, + // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect + // a syntax error because the parser cannot find an entry corresponding to the invalid token. + tok, err := p.toks.Next() + if err != nil { + return nil, err + } + + if p.gram.SkipTerminal(tok.TerminalID()) { + continue + } + + return tok, nil + } +} + +func (p *Parser) tokenToTerminal(tok VToken) int { + if tok.EOF() { + return p.gram.EOF() + } + + return tok.TerminalID() +} + +func (p *Parser) lookupAction(tok VToken) int { + if !p.disableLAC { + term := p.tokenToTerminal(tok) + if !p.validateLookahead(term) { + return 0 + } + } + + return p.gram.Action(p.stateStack.top(), p.tokenToTerminal(tok)) +} + +func (p *Parser) lookupActionOnError() (int, error) { + act := p.gram.Action(p.stateStack.top(), p.gram.Error()) + if act >= 0 { + return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.Terminal(p.gram.Error())) + } + + return act, nil +} + +func (p *Parser) shift(nextState int) { + p.stateStack.push(nextState) +} + +func (p *Parser) reduce(prodNum int) bool { + lhs := p.gram.LHS(prodNum) + if lhs == p.gram.LHS(p.gram.StartProduction()) { + return true + } + n := p.gram.AlternativeSymbolCount(prodNum) + p.stateStack.pop(n) + nextState := p.gram.GoTo(p.stateStack.top(), lhs) + p.stateStack.push(nextState) + return false +} + +func (p *Parser) trapError() (int, bool) { + count := 0 + for { + if p.gram.ErrorTrapperState(p.stateStack.top()) { + return count, true + } + + if p.stateStack.top() != p.gram.InitialState() { + p.stateStack.pop(1) + count++ + } else { + return 0, false + } + } +} + +func (p *Parser) SyntaxErrors() []*SyntaxError { + return p.synErrs +} + +func (p *Parser) searchLookahead(state int) []string { + kinds := []string{} + termCount := p.gram.TerminalCount() + for term := 0; term < termCount; term++ { + if p.disableLAC { + if p.gram.Action(p.stateStack.top(), term) == 0 { + continue + } + } else { + if !p.validateLookahead(term) { + continue + } + } + + // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol + // intentionally. + if term == p.gram.Error() { + continue + } + + kinds = append(kinds, p.gram.Terminal(term)) + } + + return kinds +} + +type stateStack struct { + items []int + itemsExp []int +} + +func (s *stateStack) enableExploratoryMode() { + s.itemsExp = make([]int, len(s.items)) + copy(s.itemsExp, s.items) +} + +func (s *stateStack) disableExploratoryMode() { + s.itemsExp = nil +} + +func (s *stateStack) top() int { + return s.items[len(s.items)-1] +} + +func (s *stateStack) topExploratorily() int { + return s.itemsExp[len(s.itemsExp)-1] +} + +func (s *stateStack) push(state int) { + s.items = append(s.items, state) +} + +func (s *stateStack) pushExploratorily(state int) { + s.itemsExp = append(s.itemsExp, state) +} + +func (s *stateStack) pop(n int) { + s.items = s.items[:len(s.items)-n] +} + +func (s *stateStack) popExploratorily(n int) { + s.itemsExp = s.itemsExp[:len(s.itemsExp)-n] +} + +type grammarImpl struct { + recoverProductions []int + action []int + goTo []int + alternativeSymbolCounts []int + errorTrapperStates []int + nonTerminals []string + lhsSymbols []int + terminals []string + terminalSkip []int + astActions [][]int +} + +func NewGrammar() *grammarImpl { + return &grammarImpl{ + recoverProductions: []int{ + 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + }, + action: []int{ + 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -3, 0, 0, 0, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, -5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, + -2, 7, 0, -11, 0, 0, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 4, + 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -14, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 12, 12, 0, 0, -17, 12, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -22, 0, 16, 16, 0, 0, 0, 0, 0, -23, + -24, -25, -26, -27, -28, -29, 16, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -30, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + -31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -23, -24, -25, -26, -27, -28, -29, 15, + 0, 18, 0, 0, 18, 18, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 18, 18, 18, 0, + 25, 0, 0, 25, 25, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -33, 0, 19, 0, + 0, 19, 19, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, -34, 0, 0, 0, 0, 0, 0, 20, 0, 0, 20, + 20, 0, 0, 0, 0, 0, 20, 20, 20, 20, 20, 20, 20, 20, 0, 21, 0, 0, 21, 21, + 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 0, 22, 0, 0, 22, 22, 0, + 0, 0, 0, 0, 22, 22, 22, 22, 22, 22, 22, 22, 0, 23, 0, 0, 23, 23, 0, 0, + 0, 0, 0, 23, 23, 23, 23, 23, 23, 23, 23, 0, 24, 0, 0, 24, 24, 0, 0, 0, + 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 0, 10, 0, 0, 10, 10, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 13, 13, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 17, 17, 0, 0, 0, 0, 0, 17, + 17, 17, 17, 17, 17, 17, 17, 0, 14, 0, 0, 14, 14, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, -35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -36, + 0, 0, 0, 0, 0, 26, 0, 0, 26, 26, 0, 0, 0, 0, 0, 26, 26, 26, 26, 26, + 26, 26, 26, + }, + goTo: []int{ + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 0, 10, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 19, 20, 21, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 21, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + }, + alternativeSymbolCounts: []int{ + 0, 1, 4, 4, 3, 2, 1, 0, 1, 1, 3, 1, 0, 3, 3, 1, 0, 2, 1, 1, + 1, 1, 1, 1, 1, 1, 4, + }, + errorTrapperStates: []int{ + 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }, + nonTerminals: []string{ + "", + "tree'", + "tree", + "tree_list", + "string", + "raw_string", + "opt_raw_string_body", + "interpreted_string", + "opt_interpreted_string_body", + "interpreted_string_body", + "interpreted_string_elem", + "codepoint_expr", + }, + lhsSymbols: []int{ + 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, + 10, 10, 10, 10, 10, 10, 11, + }, + terminals: []string{ + "", + "<eof>", + "error", + "ws", + "l_paren", + "r_paren", + "identifier", + "raw_string_open", + "raw_string_body", + "raw_string_close", + "interpreted_string_open", + "interpreted_seq", + "codepoint_prefix", + "l_brace", + "r_brace", + "hex_digits", + "escaped_seq", + "escape_char", + "interpreted_string_close", + }, + terminalSkip: []int{ + 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + }, + astActions: [][]int{ + nil, + nil, + { + 2, -3, + }, + { + 2, 3, + }, + { + 2, + }, + { + -1, 2, + }, + nil, + nil, + nil, + nil, + { + -2, + }, + nil, + nil, + { + -2, + }, + { + 2, + }, + { + -1, + }, + nil, + { + -1, -2, + }, + { + -1, + }, + nil, + nil, + nil, + nil, + nil, + nil, + nil, + { + 3, + }, + }, + } +} + +func (g *grammarImpl) InitialState() int { + return 0 +} + +func (g *grammarImpl) StartProduction() int { + return 1 +} + +func (g *grammarImpl) RecoverProduction(prod int) bool { + return g.recoverProductions[prod] != 0 +} + +func (g *grammarImpl) Action(state int, terminal int) int { + return g.action[state*19+terminal] +} + +func (g *grammarImpl) GoTo(state int, lhs int) int { + return g.goTo[state*12+lhs] +} + +func (g *grammarImpl) AlternativeSymbolCount(prod int) int { + return g.alternativeSymbolCounts[prod] +} + +func (g *grammarImpl) TerminalCount() int { + return 19 +} + +func (g *grammarImpl) SkipTerminal(terminal int) bool { + return g.terminalSkip[terminal] == 1 +} + +func (g *grammarImpl) ErrorTrapperState(state int) bool { + return g.errorTrapperStates[state] != 0 +} + +func (g *grammarImpl) NonTerminal(nonTerminal int) string { + return g.nonTerminals[nonTerminal] +} + +func (g *grammarImpl) LHS(prod int) int { + return g.lhsSymbols[prod] +} + +func (g *grammarImpl) EOF() int { + return 1 +} + +func (g *grammarImpl) Error() int { + return 2 +} + +func (g *grammarImpl) Terminal(terminal int) string { + return g.terminals[terminal] +} + +func (g *grammarImpl) ASTAction(prod int) []int { + return g.astActions[prod] +} + +type vToken struct { + terminalID int + tok *Token +} + +func (t *vToken) TerminalID() int { + return t.terminalID +} + +func (t *vToken) Lexeme() []byte { + return t.tok.Lexeme +} + +func (t *vToken) EOF() bool { + return t.tok.EOF +} + +func (t *vToken) Invalid() bool { + return t.tok.Invalid +} + +func (t *vToken) Position() (int, int) { + return t.tok.Row, t.tok.Col +} + +var kindToTerminal = []int{ + 0, 3, 4, 5, 6, 7, 10, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, +} + +type tokenStream struct { + lex *Lexer + kindToTerminal []int +} + +func NewTokenStream(src io.Reader) (*tokenStream, error) { + lex, err := NewLexer(NewLexSpec(), src) + if err != nil { + return nil, err + } + + return &tokenStream{ + lex: lex, + }, nil +} + +func (t *tokenStream) Next() (VToken, error) { + tok, err := t.lex.Next() + if err != nil { + return nil, err + } + return &vToken{ + terminalID: kindToTerminal[tok.KindID], + tok: tok, + }, nil +} +// Code generated by vartan-go. DO NOT EDIT. + +// SemanticActionSet is a set of semantic actions a parser calls. +type SemanticActionSet interface { + // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol. + // When the parser recovered from an error state by shifting the token, `recovered` is true. + Shift(tok VToken, recovered bool) + + // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production. + // When the parser recovered from an error state by reducing the production, `recovered` is true. + Reduce(prodNum int, recovered bool) + + // Accept runs when the parser accepts an input. + Accept() + + // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack. + // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards + // from the state stack. + // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token + // corresponding to the error symbol doesn't exist. + TrapAndShiftError(cause VToken, popped int) + + // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error. + MissError(cause VToken) +} + +var _ SemanticActionSet = &SyntaxTreeActionSet{} + +// SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface. +type SyntaxTreeNode interface { + // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast` + // directive with `...` operator. + ChildCount() int + + // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast` + // directive with `...` operator. + ExpandChildren() []SyntaxTreeNode +} + +var _ SyntaxTreeNode = &Node{} + +// SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types. +// The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface. +type SyntaxTreeBuilder interface { + Shift(kindName string, text string, row, col int) SyntaxTreeNode + ShiftError(kindName string) SyntaxTreeNode + Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode + Accept(f SyntaxTreeNode) +} + +var _ SyntaxTreeBuilder = &DefaulSyntaxTreeBuilder{} + +// DefaulSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder. +type DefaulSyntaxTreeBuilder struct { + tree *Node +} + +// NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder. +func NewDefaultSyntaxTreeBuilder() *DefaulSyntaxTreeBuilder { + return &DefaulSyntaxTreeBuilder{} +} + +// Shift is a implementation of SyntaxTreeBuilder.Shift. +func (b *DefaulSyntaxTreeBuilder) Shift(kindName string, text string, row, col int) SyntaxTreeNode { + return &Node{ + Type: NodeTypeTerminal, + KindName: kindName, + Text: text, + Row: row, + Col: col, + } +} + +// ShiftError is a implementation of SyntaxTreeBuilder.ShiftError. +func (b *DefaulSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode { + return &Node{ + Type: NodeTypeError, + KindName: kindName, + } +} + +// Reduce is a implementation of SyntaxTreeBuilder.Reduce. +func (b *DefaulSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode { + cNodes := make([]*Node, len(children)) + for i, c := range children { + cNodes[i] = c.(*Node) + } + return &Node{ + Type: NodeTypeNonTerminal, + KindName: kindName, + Children: cNodes, + } +} + +// Accept is a implementation of SyntaxTreeBuilder.Accept. +func (b *DefaulSyntaxTreeBuilder) Accept(f SyntaxTreeNode) { + b.tree = f.(*Node) +} + +// Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil. +func (b *DefaulSyntaxTreeBuilder) Tree() *Node { + return b.tree +} + +// SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree. +type SyntaxTreeActionSet struct { + gram Grammar + builder SyntaxTreeBuilder + semStack *semanticStack + disableASTAction bool +} + +// NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree). +// When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them. +func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { + return &SyntaxTreeActionSet{ + gram: gram, + builder: builder, + semStack: newSemanticStack(), + } +} + +// NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree). +// Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them. +func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { + return &SyntaxTreeActionSet{ + gram: gram, + builder: builder, + semStack: newSemanticStack(), + disableASTAction: true, + } +} + +// Shift is a implementation of SemanticActionSet.Shift method. +func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) { + term := a.tokenToTerminal(tok) + row, col := tok.Position() + a.semStack.push(a.builder.Shift(a.gram.Terminal(term), string(tok.Lexeme()), row, col)) +} + +// Reduce is a implementation of SemanticActionSet.Reduce method. +func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) { + lhs := a.gram.LHS(prodNum) + + // When an alternative is empty, `n` will be 0, and `handle` will be empty slice. + n := a.gram.AlternativeSymbolCount(prodNum) + handle := a.semStack.pop(n) + + var astAct []int + if !a.disableASTAction { + astAct = a.gram.ASTAction(prodNum) + } + var children []SyntaxTreeNode + if astAct != nil { + // Count the number of children in advance to avoid frequent growth in a slice for children. + { + l := 0 + for _, e := range astAct { + if e > 0 { + l++ + } else { + offset := e*-1 - 1 + l += handle[offset].ChildCount() + } + } + + children = make([]SyntaxTreeNode, l) + } + + p := 0 + for _, e := range astAct { + if e > 0 { + offset := e - 1 + children[p] = handle[offset] + p++ + } else { + offset := e*-1 - 1 + for _, c := range handle[offset].ExpandChildren() { + children[p] = c + p++ + } + } + } + } else { + // If an alternative has no AST action, a driver generates + // a node with the same structure as a CST. + children = handle + } + + a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children)) +} + +// Accept is a implementation of SemanticActionSet.Accept method. +func (a *SyntaxTreeActionSet) Accept() { + top := a.semStack.pop(1) + a.builder.Accept(top[0]) +} + +// TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method. +func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) { + a.semStack.pop(popped) + a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error()))) +} + +// MissError is a implementation of SemanticActionSet.MissError method. +func (a *SyntaxTreeActionSet) MissError(cause VToken) { +} + +func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int { + if tok.EOF() { + return a.gram.EOF() + } + + return tok.TerminalID() +} + +type semanticStack struct { + frames []SyntaxTreeNode +} + +func newSemanticStack() *semanticStack { + return &semanticStack{ + frames: make([]SyntaxTreeNode, 0, 100), + } +} + +func (s *semanticStack) push(f SyntaxTreeNode) { + s.frames = append(s.frames, f) +} + +func (s *semanticStack) pop(n int) []SyntaxTreeNode { + fs := s.frames[len(s.frames)-n:] + s.frames = s.frames[:len(s.frames)-n] + + return fs +} + +type NodeType int + +const ( + NodeTypeError = 0 + NodeTypeTerminal = 1 + NodeTypeNonTerminal = 2 +) + +// Node is a implementation of SyntaxTreeNode interface. +type Node struct { + Type NodeType + KindName string + Text string + Row int + Col int + Children []*Node +} + +func (n *Node) MarshalJSON() ([]byte, error) { + switch n.Type { + case NodeTypeError: + return json.Marshal(struct { + Type NodeType `json:"type"` + KindName string `json:"kind_name"` + }{ + Type: n.Type, + KindName: n.KindName, + }) + case NodeTypeTerminal: + if n.KindName == "" { + return json.Marshal(struct { + Type NodeType `json:"type"` + Text string `json:"text"` + Row int `json:"row"` + Col int `json:"col"` + }{ + Type: n.Type, + Text: n.Text, + Row: n.Row, + Col: n.Col, + }) + } + return json.Marshal(struct { + Type NodeType `json:"type"` + KindName string `json:"kind_name"` + Text string `json:"text"` + Row int `json:"row"` + Col int `json:"col"` + }{ + Type: n.Type, + KindName: n.KindName, + Text: n.Text, + Row: n.Row, + Col: n.Col, + }) + case NodeTypeNonTerminal: + return json.Marshal(struct { + Type NodeType `json:"type"` + KindName string `json:"kind_name"` + Children []*Node `json:"children"` + }{ + Type: n.Type, + KindName: n.KindName, + Children: n.Children, + }) + default: + return nil, fmt.Errorf("invalid node type: %v", n.Type) + } +} + +// ChildCount is a implementation of SyntaxTreeNode.ChildCount. +func (n *Node) ChildCount() int { + return len(n.Children) +} + +// ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren. +func (n *Node) ExpandChildren() []SyntaxTreeNode { + fs := make([]SyntaxTreeNode, len(n.Children)) + for i, n := range n.Children { + fs[i] = n + } + return fs +} + +// PrintTree prints a syntax tree whose root is `node`. +func PrintTree(w io.Writer, node *Node) { + printTree(w, node, "", "") +} + +func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) { + if node == nil { + return + } + + switch node.Type { + case NodeTypeError: + fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) + case NodeTypeTerminal: + fmt.Fprintf(w, "%v%v %v\n", ruledLine, node.KindName, strconv.Quote(node.Text)) + case NodeTypeNonTerminal: + fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) + + num := len(node.Children) + for i, child := range node.Children { + var line string + if num > 1 && i < num-1 { + line = "├─ " + } else { + line = "└─ " + } + + var prefix string + if i >= num-1 { + prefix = " " + } else { + prefix = "│ " + } + + printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) + } + } +} diff --git a/src/urubu/spec/test/parser.go b/src/urubu/spec/test/parser.go deleted file mode 100644 index b7265d7..0000000 --- a/src/urubu/spec/test/parser.go +++ /dev/null @@ -1,336 +0,0 @@ -//go:generate vartan compile tree.vartan -o tree.json -//go:generate vartan-go tree.json --package test - -package test - -import ( - "bufio" - "bytes" - "errors" - "fmt" - "io" - "regexp" - "strconv" - "strings" - "unicode/utf8" -) - -type TreeDiff struct { - ExpectedPath string - ActualPath string - Message string -} - -func newTreeDiff(expected, actual *Tree, message string) *TreeDiff { - return &TreeDiff{ - ExpectedPath: expected.path(), - ActualPath: actual.path(), - Message: message, - } -} - -type Tree struct { - Parent *Tree - Offset int - Kind string - Children []*Tree - Lexeme string -} - -func NewNonTerminalTree(kind string, children ...*Tree) *Tree { - return &Tree{ - Kind: kind, - Children: children, - } -} - -func NewTerminalNode(kind string, lexeme string) *Tree { - return &Tree{ - Kind: kind, - Lexeme: lexeme, - } -} - -func (t *Tree) Fill() *Tree { - for i, c := range t.Children { - c.Parent = t - c.Offset = i - c.Fill() - } - return t -} - -func (t *Tree) path() string { - if t.Parent == nil { - return t.Kind - } - return fmt.Sprintf("%v.[%v]%v", t.Parent.path(), t.Offset, t.Kind) -} - -func (t *Tree) Format() []byte { - var b bytes.Buffer - t.format(&b, 0) - return b.Bytes() -} - -func (t *Tree) format(buf *bytes.Buffer, depth int) { - for i := 0; i < depth; i++ { - buf.WriteString(" ") - } - buf.WriteString("(") - buf.WriteString(t.Kind) - if len(t.Children) > 0 { - buf.WriteString("\n") - for i, c := range t.Children { - c.format(buf, depth+1) - if i < len(t.Children)-1 { - buf.WriteString("\n") - } - } - } - buf.WriteString(")") -} - -func DiffTree(expected, actual *Tree) []*TreeDiff { - if expected == nil && actual == nil { - return nil - } - if actual.Kind != expected.Kind { - msg := fmt.Sprintf("unexpected kind: expected '%v' but got '%v'", expected.Kind, actual.Kind) - return []*TreeDiff{ - newTreeDiff(expected, actual, msg), - } - } - if expected.Lexeme != actual.Lexeme { - msg := fmt.Sprintf("unexpected lexeme: expected '%v' but got '%v'", expected.Lexeme, actual.Lexeme) - return []*TreeDiff{ - newTreeDiff(expected, actual, msg), - } - } - if len(actual.Children) != len(expected.Children) { - msg := fmt.Sprintf("unexpected node count: expected %v but got %v", len(expected.Children), len(actual.Children)) - return []*TreeDiff{ - newTreeDiff(expected, actual, msg), - } - } - var diffs []*TreeDiff - for i, exp := range expected.Children { - if ds := DiffTree(exp, actual.Children[i]); len(ds) > 0 { - diffs = append(diffs, ds...) - } - } - return diffs -} - -type TestCase struct { - Description string - Source []byte - Output *Tree -} - -func ParseTestCase(r io.Reader) (*TestCase, error) { - parts, err := splitIntoParts(r) - if err != nil { - return nil, err - } - if len(parts) != 3 { - return nil, fmt.Errorf("too many or too few part delimiters: a test case consists of just tree parts: %v parts found", len(parts)) - } - - tp := &treeParser{ - lineOffset: parts[0].lineCount + parts[1].lineCount + 2, - } - tree, err := tp.parseTree(bytes.NewReader(parts[2].buf)) - if err != nil { - return nil, err - } - - return &TestCase{ - Description: string(parts[0].buf), - Source: parts[1].buf, - Output: tree, - }, nil -} - -type testCasePart struct { - buf []byte - lineCount int -} - -func splitIntoParts(r io.Reader) ([]*testCasePart, error) { - var bufs []*testCasePart - s := bufio.NewScanner(r) - for { - buf, lineCount, err := readPart(s) - if err != nil { - return nil, err - } - if buf == nil { - break - } - bufs = append(bufs, &testCasePart{ - buf: buf, - lineCount: lineCount, - }) - } - if err := s.Err(); err != nil { - return nil, err - } - return bufs, nil -} - -var reDelim = regexp.MustCompile(`^\s*---+\s*$`) - -func readPart(s *bufio.Scanner) ([]byte, int, error) { - if !s.Scan() { - return nil, 0, s.Err() - } - buf := &bytes.Buffer{} - line := s.Bytes() - if reDelim.Match(line) { - // Return an empty slice because (*bytes.Buffer).Bytes() returns nil if we have never written data. - return []byte{}, 0, nil - } - _, err := buf.Write(line) - if err != nil { - return nil, 0, err - } - lineCount := 1 - for s.Scan() { - line := s.Bytes() - if reDelim.Match(line) { - return buf.Bytes(), lineCount, nil - } - _, err := buf.Write([]byte("\n")) - if err != nil { - return nil, 0, err - } - _, err = buf.Write(line) - if err != nil { - return nil, 0, err - } - lineCount++ - } - if err := s.Err(); err != nil { - return nil, 0, err - } - return buf.Bytes(), lineCount, nil -} - -type treeParser struct { - lineOffset int -} - -func (tp *treeParser) parseTree(src io.Reader) (*Tree, error) { - toks, err := NewTokenStream(src) - if err != nil { - return nil, err - } - gram := NewGrammar() - tb := NewDefaultSyntaxTreeBuilder() - p, err := NewParser(toks, gram, SemanticAction(NewASTActionSet(gram, tb))) - if err != nil { - return nil, err - } - err = p.Parse() - if err != nil { - return nil, err - } - synErrs := p.SyntaxErrors() - if len(synErrs) > 0 { - var b strings.Builder - b.Write(formatSyntaxError(synErrs[0], gram, tp.lineOffset)) - for _, synErr := range synErrs[1:] { - b.WriteRune('\n') - b.Write(formatSyntaxError(synErr, gram, tp.lineOffset)) - } - return nil, errors.New(b.String()) - } - t, err := tp.genTree(tb.Tree()) - if err != nil { - return nil, err - } - return t.Fill(), nil -} - -func formatSyntaxError(synErr *SyntaxError, gram Grammar, lineOffset int) []byte { - var b bytes.Buffer - - b.WriteString(fmt.Sprintf("%v:%v: %v: ", lineOffset+synErr.Row+1, synErr.Col+1, synErr.Message)) - - tok := synErr.Token - switch { - case tok.EOF(): - b.WriteString("<eof>") - case tok.Invalid(): - b.WriteString(fmt.Sprintf("'%v' (<invalid>)", string(tok.Lexeme()))) - default: - if term := gram.Terminal(tok.TerminalID()); term != "" { - b.WriteString(fmt.Sprintf("'%v' (%v)", string(tok.Lexeme()), term)) - } else { - b.WriteString(fmt.Sprintf("'%v'", string(tok.Lexeme()))) - } - } - b.WriteString(fmt.Sprintf(": expected: %v", synErr.ExpectedTerminals[0])) - for _, t := range synErr.ExpectedTerminals[1:] { - b.WriteString(fmt.Sprintf(", %v", t)) - } - - return b.Bytes() -} - -func (tp *treeParser) genTree(node *Node) (*Tree, error) { - // A node labeled 'error' cannot have children. It always must be (error). - if sym := node.Children[0]; sym.Text == "error" { - if len(node.Children) > 1 { - return nil, fmt.Errorf("%v:%v: error node cannot take children", tp.lineOffset+sym.Row+1, sym.Col+1) - } - return NewTerminalNode(sym.Text, ""), nil - } - - if len(node.Children) == 2 && node.Children[1].KindName == "string" { - var text string - str := node.Children[1].Children[0] - switch str.KindName { - case "raw_string": - text = str.Children[0].Text - case "interpreted_string": - var b strings.Builder - for _, c := range str.Children { - switch c.KindName { - case "escaped_seq": - b.WriteString(strings.TrimPrefix(`\`, c.Text)) - case "escape_char": - return nil, fmt.Errorf("%v:%v: incomplete escape sequence", tp.lineOffset+c.Row+1, c.Col+1) - case "codepoint_expr": - cp := c.Children[0] - n, err := strconv.ParseInt(cp.Text, 16, 64) - if err != nil { - return nil, fmt.Errorf("%v:%v: %v", tp.lineOffset+cp.Row+1, cp.Col+1, err) - } - if !utf8.ValidRune(rune(n)) { - return nil, fmt.Errorf("%v:%v: invalid code point: %v", tp.lineOffset+cp.Row+1, cp.Col+1, cp.Text) - } - b.WriteRune(rune(n)) - default: - b.WriteString(c.Text) - } - } - text = b.String() - } - return NewTerminalNode(node.Children[0].Text, text), nil - } - - var children []*Tree - if len(node.Children) > 1 { - children = make([]*Tree, len(node.Children)-1) - for i, c := range node.Children[1:] { - var err error - children[i], err = tp.genTree(c) - if err != nil { - return nil, err - } - } - } - return NewNonTerminalTree(node.Children[0].Text, children...), nil -} diff --git a/src/urubu/spec/test/tree_parser.go b/src/urubu/spec/test/tree_parser.go deleted file mode 100644 index 528d259..0000000 --- a/src/urubu/spec/test/tree_parser.go +++ /dev/null @@ -1,716 +0,0 @@ -// Code generated by vartan-go. DO NOT EDIT. -package test - -import ( - "fmt" - "io" -) - -type Grammar interface { - // InitialState returns the initial state of a parser. - InitialState() int - - // StartProduction returns the start production of grammar. - StartProduction() int - - // Action returns an ACTION entry corresponding to a (state, terminal symbol) pair. - Action(state int, terminal int) int - - // GoTo returns a GOTO entry corresponding to a (state, non-terminal symbol) pair. - GoTo(state int, lhs int) int - - // ErrorTrapperState returns true when a state can shift the error symbol. - ErrorTrapperState(state int) bool - - // LHS returns a LHS symbol of a production. - LHS(prod int) int - - // AlternativeSymbolCount returns a symbol count of p production. - AlternativeSymbolCount(prod int) int - - // RecoverProduction returns true when a production has the recover directive. - RecoverProduction(prod int) bool - - // NonTerminal retuns a string representaion of a non-terminal symbol. - NonTerminal(nonTerminal int) string - - // TerminalCount returns a terminal symbol count of grammar. - TerminalCount() int - - // SkipTerminal returns true when a terminal symbol must be skipped on syntax analysis. - SkipTerminal(terminal int) bool - - // EOF returns the EOF symbol. - EOF() int - - // Error returns the error symbol. - Error() int - - // Terminal retuns a string representaion of a terminal symbol. - Terminal(terminal int) string - - // ASTAction returns an AST action entries. - ASTAction(prod int) []int -} - -type VToken interface { - // TerminalID returns a terminal ID. - TerminalID() int - - // Lexeme returns a lexeme. - Lexeme() []byte - - // EOF returns true when a token represents EOF. - EOF() bool - - // Invalid returns true when a token is invalid. - Invalid() bool - - // Position returns (row, column) pair. - Position() (int, int) -} - -type TokenStream interface { - Next() (VToken, error) -} - -type SyntaxError struct { - Row int - Col int - Message string - Token VToken - ExpectedTerminals []string -} - -type ParserOption func(p *Parser) error - -// DisableLAC disables LAC (lookahead correction). LAC is enabled by default. -func DisableLAC() ParserOption { - return func(p *Parser) error { - p.disableLAC = true - return nil - } -} - -func SemanticAction(semAct SemanticActionSet) ParserOption { - return func(p *Parser) error { - p.semAct = semAct - return nil - } -} - -type Parser struct { - toks TokenStream - gram Grammar - stateStack *stateStack - semAct SemanticActionSet - disableLAC bool - onError bool - shiftCount int - synErrs []*SyntaxError -} - -func NewParser(toks TokenStream, gram Grammar, opts ...ParserOption) (*Parser, error) { - p := &Parser{ - toks: toks, - gram: gram, - stateStack: &stateStack{}, - } - - for _, opt := range opts { - err := opt(p) - if err != nil { - return nil, err - } - } - - return p, nil -} - -func (p *Parser) Parse() error { - p.stateStack.push(p.gram.InitialState()) - tok, err := p.nextToken() - if err != nil { - return err - } - -ACTION_LOOP: - for { - act := p.lookupAction(tok) - - switch { - case act < 0: // Shift - nextState := act * -1 - - recovered := false - if p.onError { - p.shiftCount++ - - // When the parser performs shift three times, the parser recovers from the error state. - if p.shiftCount >= 3 { - p.onError = false - p.shiftCount = 0 - recovered = true - } - } - - p.shift(nextState) - - if p.semAct != nil { - p.semAct.Shift(tok, recovered) - } - - tok, err = p.nextToken() - if err != nil { - return err - } - case act > 0: // Reduce - prodNum := act - - recovered := false - if p.onError && p.gram.RecoverProduction(prodNum) { - p.onError = false - p.shiftCount = 0 - recovered = true - } - - accepted := p.reduce(prodNum) - if accepted { - if p.semAct != nil { - p.semAct.Accept() - } - - return nil - } - - if p.semAct != nil { - p.semAct.Reduce(prodNum, recovered) - } - default: // Error - if p.onError { - tok, err = p.nextToken() - if err != nil { - return err - } - if tok.EOF() { - if p.semAct != nil { - p.semAct.MissError(tok) - } - - return nil - } - - continue ACTION_LOOP - } - - row, col := tok.Position() - p.synErrs = append(p.synErrs, &SyntaxError{ - Row: row, - Col: col, - Message: "unexpected token", - Token: tok, - ExpectedTerminals: p.searchLookahead(p.stateStack.top()), - }) - - count, ok := p.trapError() - if !ok { - if p.semAct != nil { - p.semAct.MissError(tok) - } - - return nil - } - - p.onError = true - p.shiftCount = 0 - - act, err := p.lookupActionOnError() - if err != nil { - return err - } - - p.shift(act * -1) - - if p.semAct != nil { - p.semAct.TrapAndShiftError(tok, count) - } - } - } -} - -// validateLookahead validates whether `term` is a valid lookahead in the current context. When `term` is valid, -// this method returns `true`. -func (p *Parser) validateLookahead(term int) bool { - p.stateStack.enableExploratoryMode() - defer p.stateStack.disableExploratoryMode() - - for { - act := p.gram.Action(p.stateStack.topExploratorily(), term) - - switch { - case act < 0: // Shift - return true - case act > 0: // Reduce - prodNum := act - - lhs := p.gram.LHS(prodNum) - if lhs == p.gram.LHS(p.gram.StartProduction()) { - return true - } - n := p.gram.AlternativeSymbolCount(prodNum) - p.stateStack.popExploratorily(n) - state := p.gram.GoTo(p.stateStack.topExploratorily(), lhs) - p.stateStack.pushExploratorily(state) - default: // Error - return false - } - } -} - -func (p *Parser) nextToken() (VToken, error) { - for { - // We don't have to check whether the token is invalid because the kind ID of the invalid token is 0, - // and the parsing table doesn't have an entry corresponding to the kind ID 0. Thus we can detect - // a syntax error because the parser cannot find an entry corresponding to the invalid token. - tok, err := p.toks.Next() - if err != nil { - return nil, err - } - - if p.gram.SkipTerminal(tok.TerminalID()) { - continue - } - - return tok, nil - } -} - -func (p *Parser) tokenToTerminal(tok VToken) int { - if tok.EOF() { - return p.gram.EOF() - } - - return tok.TerminalID() -} - -func (p *Parser) lookupAction(tok VToken) int { - if !p.disableLAC { - term := p.tokenToTerminal(tok) - if !p.validateLookahead(term) { - return 0 - } - } - - return p.gram.Action(p.stateStack.top(), p.tokenToTerminal(tok)) -} - -func (p *Parser) lookupActionOnError() (int, error) { - act := p.gram.Action(p.stateStack.top(), p.gram.Error()) - if act >= 0 { - return 0, fmt.Errorf("an entry must be a shift action by the error symbol; entry: %v, state: %v, symbol: %v", act, p.stateStack.top(), p.gram.Terminal(p.gram.Error())) - } - - return act, nil -} - -func (p *Parser) shift(nextState int) { - p.stateStack.push(nextState) -} - -func (p *Parser) reduce(prodNum int) bool { - lhs := p.gram.LHS(prodNum) - if lhs == p.gram.LHS(p.gram.StartProduction()) { - return true - } - n := p.gram.AlternativeSymbolCount(prodNum) - p.stateStack.pop(n) - nextState := p.gram.GoTo(p.stateStack.top(), lhs) - p.stateStack.push(nextState) - return false -} - -func (p *Parser) trapError() (int, bool) { - count := 0 - for { - if p.gram.ErrorTrapperState(p.stateStack.top()) { - return count, true - } - - if p.stateStack.top() != p.gram.InitialState() { - p.stateStack.pop(1) - count++ - } else { - return 0, false - } - } -} - -func (p *Parser) SyntaxErrors() []*SyntaxError { - return p.synErrs -} - -func (p *Parser) searchLookahead(state int) []string { - kinds := []string{} - termCount := p.gram.TerminalCount() - for term := 0; term < termCount; term++ { - if p.disableLAC { - if p.gram.Action(p.stateStack.top(), term) == 0 { - continue - } - } else { - if !p.validateLookahead(term) { - continue - } - } - - // We don't add the error symbol to the look-ahead symbols because users cannot input the error symbol - // intentionally. - if term == p.gram.Error() { - continue - } - - kinds = append(kinds, p.gram.Terminal(term)) - } - - return kinds -} - -type stateStack struct { - items []int - itemsExp []int -} - -func (s *stateStack) enableExploratoryMode() { - s.itemsExp = make([]int, len(s.items)) - copy(s.itemsExp, s.items) -} - -func (s *stateStack) disableExploratoryMode() { - s.itemsExp = nil -} - -func (s *stateStack) top() int { - return s.items[len(s.items)-1] -} - -func (s *stateStack) topExploratorily() int { - return s.itemsExp[len(s.itemsExp)-1] -} - -func (s *stateStack) push(state int) { - s.items = append(s.items, state) -} - -func (s *stateStack) pushExploratorily(state int) { - s.itemsExp = append(s.itemsExp, state) -} - -func (s *stateStack) pop(n int) { - s.items = s.items[:len(s.items)-n] -} - -func (s *stateStack) popExploratorily(n int) { - s.itemsExp = s.itemsExp[:len(s.itemsExp)-n] -} - -type grammarImpl struct { - recoverProductions []int - action []int - goTo []int - alternativeSymbolCounts []int - errorTrapperStates []int - nonTerminals []string - lhsSymbols []int - terminals []string - terminalSkip []int - astActions [][]int -} - -func NewGrammar() *grammarImpl { - return &grammarImpl{ - recoverProductions: []int{ - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, - }, - action: []int{ - 0, 0, 0, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -3, 0, 0, 0, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, -5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 0, - -2, 7, 0, -11, 0, 0, -12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 4, - 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -2, -14, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 12, 12, 0, 0, -17, 12, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -22, 0, 16, 16, 0, 0, 0, 0, 0, -23, - -24, -25, -26, -27, -28, -29, 16, 0, 0, 0, 0, 5, 5, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 3, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -30, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -23, -24, -25, -26, -27, -28, -29, 15, - 0, 18, 0, 0, 18, 18, 0, 0, 0, 0, 0, 18, 18, 18, 18, 18, 18, 18, 18, 0, - 25, 0, 0, 25, 25, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -33, 0, 19, 0, - 0, 19, 19, 0, 0, 0, 0, 0, 19, 19, 19, 19, 19, 19, 19, 19, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, -34, 0, 0, 0, 0, 0, 0, 20, 0, 0, 20, - 20, 0, 0, 0, 0, 0, 20, 20, 20, 20, 20, 20, 20, 20, 0, 21, 0, 0, 21, 21, - 0, 0, 0, 0, 0, 21, 21, 21, 21, 21, 21, 21, 21, 0, 22, 0, 0, 22, 22, 0, - 0, 0, 0, 0, 22, 22, 22, 22, 22, 22, 22, 22, 0, 23, 0, 0, 23, 23, 0, 0, - 0, 0, 0, 23, 23, 23, 23, 23, 23, 23, 23, 0, 24, 0, 0, 24, 24, 0, 0, 0, - 0, 0, 24, 24, 24, 24, 24, 24, 24, 24, 0, 10, 0, 0, 10, 10, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 13, 13, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 17, 0, 0, 17, 17, 0, 0, 0, 0, 0, 17, - 17, 17, 17, 17, 17, 17, 17, 0, 14, 0, 0, 14, 14, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, -35, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -36, - 0, 0, 0, 0, 0, 26, 0, 0, 26, 26, 0, 0, 0, 0, 0, 26, 26, 26, 26, 26, - 26, 26, 26, - }, - goTo: []int{ - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 7, 8, 9, 0, 10, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 18, 19, 20, 21, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 21, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, - }, - alternativeSymbolCounts: []int{ - 0, 1, 4, 4, 3, 2, 1, 0, 1, 1, 3, 1, 0, 3, 3, 1, 0, 2, 1, 1, - 1, 1, 1, 1, 1, 1, 4, - }, - errorTrapperStates: []int{ - 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - nonTerminals: []string{ - "", - "tree'", - "tree", - "tree_list", - "string", - "raw_string", - "opt_raw_string_body", - "interpreted_string", - "opt_interpreted_string_body", - "interpreted_string_body", - "interpreted_string_elem", - "codepoint_expr", - }, - lhsSymbols: []int{ - 0, 1, 2, 2, 2, 3, 3, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, - 10, 10, 10, 10, 10, 10, 11, - }, - terminals: []string{ - "", - "<eof>", - "error", - "ws", - "l_paren", - "r_paren", - "identifier", - "raw_string_open", - "raw_string_body", - "raw_string_close", - "interpreted_string_open", - "interpreted_seq", - "codepoint_prefix", - "l_brace", - "r_brace", - "hex_digits", - "escaped_seq", - "escape_char", - "interpreted_string_close", - }, - terminalSkip: []int{ - 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, - astActions: [][]int{ - nil, - nil, - { - 2, -3, - }, - { - 2, 3, - }, - { - 2, - }, - { - -1, 2, - }, - nil, - nil, - nil, - nil, - { - -2, - }, - nil, - nil, - { - -2, - }, - { - 2, - }, - { - -1, - }, - nil, - { - -1, -2, - }, - { - -1, - }, - nil, - nil, - nil, - nil, - nil, - nil, - nil, - { - 3, - }, - }, - } -} - -func (g *grammarImpl) InitialState() int { - return 0 -} - -func (g *grammarImpl) StartProduction() int { - return 1 -} - -func (g *grammarImpl) RecoverProduction(prod int) bool { - return g.recoverProductions[prod] != 0 -} - -func (g *grammarImpl) Action(state int, terminal int) int { - return g.action[state*19+terminal] -} - -func (g *grammarImpl) GoTo(state int, lhs int) int { - return g.goTo[state*12+lhs] -} - -func (g *grammarImpl) AlternativeSymbolCount(prod int) int { - return g.alternativeSymbolCounts[prod] -} - -func (g *grammarImpl) TerminalCount() int { - return 19 -} - -func (g *grammarImpl) SkipTerminal(terminal int) bool { - return g.terminalSkip[terminal] == 1 -} - -func (g *grammarImpl) ErrorTrapperState(state int) bool { - return g.errorTrapperStates[state] != 0 -} - -func (g *grammarImpl) NonTerminal(nonTerminal int) string { - return g.nonTerminals[nonTerminal] -} - -func (g *grammarImpl) LHS(prod int) int { - return g.lhsSymbols[prod] -} - -func (g *grammarImpl) EOF() int { - return 1 -} - -func (g *grammarImpl) Error() int { - return 2 -} - -func (g *grammarImpl) Terminal(terminal int) string { - return g.terminals[terminal] -} - -func (g *grammarImpl) ASTAction(prod int) []int { - return g.astActions[prod] -} - -type vToken struct { - terminalID int - tok *Token -} - -func (t *vToken) TerminalID() int { - return t.terminalID -} - -func (t *vToken) Lexeme() []byte { - return t.tok.Lexeme -} - -func (t *vToken) EOF() bool { - return t.tok.EOF -} - -func (t *vToken) Invalid() bool { - return t.tok.Invalid -} - -func (t *vToken) Position() (int, int) { - return t.tok.Row, t.tok.Col -} - -var kindToTerminal = []int{ - 0, 3, 4, 5, 6, 7, 10, 8, 9, 11, 12, 13, 14, 15, 16, 17, 18, -} - -type tokenStream struct { - lex *Lexer - kindToTerminal []int -} - -func NewTokenStream(src io.Reader) (*tokenStream, error) { - lex, err := NewLexer(NewLexSpec(), src) - if err != nil { - return nil, err - } - - return &tokenStream{ - lex: lex, - }, nil -} - -func (t *tokenStream) Next() (VToken, error) { - tok, err := t.lex.Next() - if err != nil { - return nil, err - } - return &vToken{ - terminalID: kindToTerminal[tok.KindID], - tok: tok, - }, nil -} diff --git a/src/urubu/spec/test/tree_semantic_action.go b/src/urubu/spec/test/tree_semantic_action.go deleted file mode 100644 index c1d5a25..0000000 --- a/src/urubu/spec/test/tree_semantic_action.go +++ /dev/null @@ -1,367 +0,0 @@ -// Code generated by vartan-go. DO NOT EDIT. -package test - -import ( - "encoding/json" - "fmt" - "io" - "strconv" -) - -// SemanticActionSet is a set of semantic actions a parser calls. -type SemanticActionSet interface { - // Shift runs when the parser shifts a symbol onto a state stack. `tok` is a token corresponding to the symbol. - // When the parser recovered from an error state by shifting the token, `recovered` is true. - Shift(tok VToken, recovered bool) - - // Reduce runs when the parser reduces an RHS of a production to its LHS. `prodNum` is a number of the production. - // When the parser recovered from an error state by reducing the production, `recovered` is true. - Reduce(prodNum int, recovered bool) - - // Accept runs when the parser accepts an input. - Accept() - - // TrapAndShiftError runs when the parser traps a syntax error and shifts a error symbol onto the state stack. - // `cause` is a token that caused a syntax error. `popped` is the number of frames that the parser discards - // from the state stack. - // Unlike `Shift` function, this function doesn't take a token to be shifted as an argument because a token - // corresponding to the error symbol doesn't exist. - TrapAndShiftError(cause VToken, popped int) - - // MissError runs when the parser fails to trap a syntax error. `cause` is a token that caused a syntax error. - MissError(cause VToken) -} - -var _ SemanticActionSet = &SyntaxTreeActionSet{} - -// SyntaxTreeNode is a node of a syntax tree. A node type used in SyntaxTreeActionSet must implement SyntaxTreeNode interface. -type SyntaxTreeNode interface { - // ChildCount returns a child count of a node. A parser calls this method to know the child count to be expanded by an `#ast` - // directive with `...` operator. - ChildCount() int - - // ExpandChildren returns children of a node. A parser calls this method to fetch the children to be expanded by an `#ast` - // directive with `...` operator. - ExpandChildren() []SyntaxTreeNode -} - -var _ SyntaxTreeNode = &Node{} - -// SyntaxTreeBuilder allows you to construct a syntax tree containing arbitrary user-defined node types. -// The parser uses SyntaxTreeBuilder interface as a part of semantic actions via SyntaxTreeActionSet interface. -type SyntaxTreeBuilder interface { - Shift(kindName string, text string, row, col int) SyntaxTreeNode - ShiftError(kindName string) SyntaxTreeNode - Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode - Accept(f SyntaxTreeNode) -} - -var _ SyntaxTreeBuilder = &DefaulSyntaxTreeBuilder{} - -// DefaulSyntaxTreeBuilder is a implementation of SyntaxTreeBuilder. -type DefaulSyntaxTreeBuilder struct { - tree *Node -} - -// NewDefaultSyntaxTreeBuilder returns a new DefaultSyntaxTreeBuilder. -func NewDefaultSyntaxTreeBuilder() *DefaulSyntaxTreeBuilder { - return &DefaulSyntaxTreeBuilder{} -} - -// Shift is a implementation of SyntaxTreeBuilder.Shift. -func (b *DefaulSyntaxTreeBuilder) Shift(kindName string, text string, row, col int) SyntaxTreeNode { - return &Node{ - Type: NodeTypeTerminal, - KindName: kindName, - Text: text, - Row: row, - Col: col, - } -} - -// ShiftError is a implementation of SyntaxTreeBuilder.ShiftError. -func (b *DefaulSyntaxTreeBuilder) ShiftError(kindName string) SyntaxTreeNode { - return &Node{ - Type: NodeTypeError, - KindName: kindName, - } -} - -// Reduce is a implementation of SyntaxTreeBuilder.Reduce. -func (b *DefaulSyntaxTreeBuilder) Reduce(kindName string, children []SyntaxTreeNode) SyntaxTreeNode { - cNodes := make([]*Node, len(children)) - for i, c := range children { - cNodes[i] = c.(*Node) - } - return &Node{ - Type: NodeTypeNonTerminal, - KindName: kindName, - Children: cNodes, - } -} - -// Accept is a implementation of SyntaxTreeBuilder.Accept. -func (b *DefaulSyntaxTreeBuilder) Accept(f SyntaxTreeNode) { - b.tree = f.(*Node) -} - -// Tree returns a syntax tree when the parser has accepted an input. If a syntax error occurs, the return value is nil. -func (b *DefaulSyntaxTreeBuilder) Tree() *Node { - return b.tree -} - -// SyntaxTreeActionSet is a implementation of SemanticActionSet interface and constructs a syntax tree. -type SyntaxTreeActionSet struct { - gram Grammar - builder SyntaxTreeBuilder - semStack *semanticStack - disableASTAction bool -} - -// NewASTActionSet returns a new SyntaxTreeActionSet that constructs an AST (Abstract Syntax Tree). -// When grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns interprets them. -func NewASTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { - return &SyntaxTreeActionSet{ - gram: gram, - builder: builder, - semStack: newSemanticStack(), - } -} - -// NewCSTTActionSet returns a new SyntaxTreeActionSet that constructs a CST (Concrete Syntax Tree). -// Even if grammar `gram` contains `#ast` directives, the new SyntaxTreeActionSet this function returns ignores them. -func NewCSTActionSet(gram Grammar, builder SyntaxTreeBuilder) *SyntaxTreeActionSet { - return &SyntaxTreeActionSet{ - gram: gram, - builder: builder, - semStack: newSemanticStack(), - disableASTAction: true, - } -} - -// Shift is a implementation of SemanticActionSet.Shift method. -func (a *SyntaxTreeActionSet) Shift(tok VToken, recovered bool) { - term := a.tokenToTerminal(tok) - row, col := tok.Position() - a.semStack.push(a.builder.Shift(a.gram.Terminal(term), string(tok.Lexeme()), row, col)) -} - -// Reduce is a implementation of SemanticActionSet.Reduce method. -func (a *SyntaxTreeActionSet) Reduce(prodNum int, recovered bool) { - lhs := a.gram.LHS(prodNum) - - // When an alternative is empty, `n` will be 0, and `handle` will be empty slice. - n := a.gram.AlternativeSymbolCount(prodNum) - handle := a.semStack.pop(n) - - var astAct []int - if !a.disableASTAction { - astAct = a.gram.ASTAction(prodNum) - } - var children []SyntaxTreeNode - if astAct != nil { - // Count the number of children in advance to avoid frequent growth in a slice for children. - { - l := 0 - for _, e := range astAct { - if e > 0 { - l++ - } else { - offset := e*-1 - 1 - l += handle[offset].ChildCount() - } - } - - children = make([]SyntaxTreeNode, l) - } - - p := 0 - for _, e := range astAct { - if e > 0 { - offset := e - 1 - children[p] = handle[offset] - p++ - } else { - offset := e*-1 - 1 - for _, c := range handle[offset].ExpandChildren() { - children[p] = c - p++ - } - } - } - } else { - // If an alternative has no AST action, a driver generates - // a node with the same structure as a CST. - children = handle - } - - a.semStack.push(a.builder.Reduce(a.gram.NonTerminal(lhs), children)) -} - -// Accept is a implementation of SemanticActionSet.Accept method. -func (a *SyntaxTreeActionSet) Accept() { - top := a.semStack.pop(1) - a.builder.Accept(top[0]) -} - -// TrapAndShiftError is a implementation of SemanticActionSet.TrapAndShiftError method. -func (a *SyntaxTreeActionSet) TrapAndShiftError(cause VToken, popped int) { - a.semStack.pop(popped) - a.semStack.push(a.builder.ShiftError(a.gram.Terminal(a.gram.Error()))) -} - -// MissError is a implementation of SemanticActionSet.MissError method. -func (a *SyntaxTreeActionSet) MissError(cause VToken) { -} - -func (a *SyntaxTreeActionSet) tokenToTerminal(tok VToken) int { - if tok.EOF() { - return a.gram.EOF() - } - - return tok.TerminalID() -} - -type semanticStack struct { - frames []SyntaxTreeNode -} - -func newSemanticStack() *semanticStack { - return &semanticStack{ - frames: make([]SyntaxTreeNode, 0, 100), - } -} - -func (s *semanticStack) push(f SyntaxTreeNode) { - s.frames = append(s.frames, f) -} - -func (s *semanticStack) pop(n int) []SyntaxTreeNode { - fs := s.frames[len(s.frames)-n:] - s.frames = s.frames[:len(s.frames)-n] - - return fs -} - -type NodeType int - -const ( - NodeTypeError = 0 - NodeTypeTerminal = 1 - NodeTypeNonTerminal = 2 -) - -// Node is a implementation of SyntaxTreeNode interface. -type Node struct { - Type NodeType - KindName string - Text string - Row int - Col int - Children []*Node -} - -func (n *Node) MarshalJSON() ([]byte, error) { - switch n.Type { - case NodeTypeError: - return json.Marshal(struct { - Type NodeType `json:"type"` - KindName string `json:"kind_name"` - }{ - Type: n.Type, - KindName: n.KindName, - }) - case NodeTypeTerminal: - if n.KindName == "" { - return json.Marshal(struct { - Type NodeType `json:"type"` - Text string `json:"text"` - Row int `json:"row"` - Col int `json:"col"` - }{ - Type: n.Type, - Text: n.Text, - Row: n.Row, - Col: n.Col, - }) - } - return json.Marshal(struct { - Type NodeType `json:"type"` - KindName string `json:"kind_name"` - Text string `json:"text"` - Row int `json:"row"` - Col int `json:"col"` - }{ - Type: n.Type, - KindName: n.KindName, - Text: n.Text, - Row: n.Row, - Col: n.Col, - }) - case NodeTypeNonTerminal: - return json.Marshal(struct { - Type NodeType `json:"type"` - KindName string `json:"kind_name"` - Children []*Node `json:"children"` - }{ - Type: n.Type, - KindName: n.KindName, - Children: n.Children, - }) - default: - return nil, fmt.Errorf("invalid node type: %v", n.Type) - } -} - -// ChildCount is a implementation of SyntaxTreeNode.ChildCount. -func (n *Node) ChildCount() int { - return len(n.Children) -} - -// ExpandChildren is a implementation of SyntaxTreeNode.ExpandChildren. -func (n *Node) ExpandChildren() []SyntaxTreeNode { - fs := make([]SyntaxTreeNode, len(n.Children)) - for i, n := range n.Children { - fs[i] = n - } - return fs -} - -// PrintTree prints a syntax tree whose root is `node`. -func PrintTree(w io.Writer, node *Node) { - printTree(w, node, "", "") -} - -func printTree(w io.Writer, node *Node, ruledLine string, childRuledLinePrefix string) { - if node == nil { - return - } - - switch node.Type { - case NodeTypeError: - fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) - case NodeTypeTerminal: - fmt.Fprintf(w, "%v%v %v\n", ruledLine, node.KindName, strconv.Quote(node.Text)) - case NodeTypeNonTerminal: - fmt.Fprintf(w, "%v%v\n", ruledLine, node.KindName) - - num := len(node.Children) - for i, child := range node.Children { - var line string - if num > 1 && i < num-1 { - line = "├─ " - } else { - line = "└─ " - } - - var prefix string - if i >= num-1 { - prefix = " " - } else { - prefix = "│ " - } - - printTree(w, child, childRuledLinePrefix+line, childRuledLinePrefix+prefix) - } - } -} diff --git a/src/urubu/spec/test/tree-report.json b/src/urubu/spec/tree-report.json index c2018e5..c2018e5 100644 --- a/src/urubu/spec/test/tree-report.json +++ b/src/urubu/spec/tree-report.json diff --git a/src/urubu/spec/test/tree.json b/src/urubu/spec/tree.json index f05c2f2..f05c2f2 100644 --- a/src/urubu/spec/test/tree.json +++ b/src/urubu/spec/tree.json diff --git a/src/urubu/spec/test/tree.vartan b/src/urubu/spec/tree.vartan index aa8f733..aa8f733 100644 --- a/src/urubu/spec/test/tree.vartan +++ b/src/urubu/spec/tree.vartan |