aboutsummaryrefslogtreecommitdiff
path: root/src/urubu/spec/grammar
diff options
context:
space:
mode:
Diffstat (limited to 'src/urubu/spec/grammar')
-rw-r--r--src/urubu/spec/grammar/clexspec.json (renamed from src/urubu/spec/grammar/parser/clexspec.json)0
-rw-r--r--src/urubu/spec/grammar/description.go71
-rw-r--r--src/urubu/spec/grammar/grammar.go160
-rw-r--r--src/urubu/spec/grammar/lexspec.json (renamed from src/urubu/spec/grammar/parser/lexspec.json)0
-rw-r--r--src/urubu/spec/grammar/parser.go (renamed from src/urubu/spec/grammar/parser/vartan_lexer.go)911
-rw-r--r--src/urubu/spec/grammar/parser/lexer.go297
-rw-r--r--src/urubu/spec/grammar/parser/parser.go582
-rw-r--r--src/urubu/spec/grammar/parser/syntax_error.go45
-rw-r--r--src/urubu/spec/grammar/util.go21
9 files changed, 910 insertions, 1177 deletions
diff --git a/src/urubu/spec/grammar/parser/clexspec.json b/src/urubu/spec/grammar/clexspec.json
index d0ed3d3..d0ed3d3 100644
--- a/src/urubu/spec/grammar/parser/clexspec.json
+++ b/src/urubu/spec/grammar/clexspec.json
diff --git a/src/urubu/spec/grammar/description.go b/src/urubu/spec/grammar/description.go
deleted file mode 100644
index 0d2a0b7..0000000
--- a/src/urubu/spec/grammar/description.go
+++ /dev/null
@@ -1,71 +0,0 @@
-package grammar
-
-type Terminal struct {
- Number int `json:"number"`
- Name string `json:"name"`
- Pattern string `json:"pattern"`
- Precedence int `json:"prec"`
- Associativity string `json:"assoc"`
-}
-
-type NonTerminal struct {
- Number int `json:"number"`
- Name string `json:"name"`
-}
-
-type Production struct {
- Number int `json:"number"`
- LHS int `json:"lhs"`
- RHS []int `json:"rhs"`
- Precedence int `json:"prec"`
- Associativity string `json:"assoc"`
-}
-
-type Item struct {
- Production int `json:"production"`
- Dot int `json:"dot"`
-}
-
-type Transition struct {
- Symbol int `json:"symbol"`
- State int `json:"state"`
-}
-
-type Reduce struct {
- LookAhead []int `json:"look_ahead"`
- Production int `json:"production"`
-}
-
-type SRConflict struct {
- Symbol int `json:"symbol"`
- State int `json:"state"`
- Production int `json:"production"`
- AdoptedState *int `json:"adopted_state"`
- AdoptedProduction *int `json:"adopted_production"`
- ResolvedBy int `json:"resolved_by"`
-}
-
-type RRConflict struct {
- Symbol int `json:"symbol"`
- Production1 int `json:"production_1"`
- Production2 int `json:"production_2"`
- AdoptedProduction int `json:"adopted_production"`
- ResolvedBy int `json:"resolved_by"`
-}
-
-type State struct {
- Number int `json:"number"`
- Kernel []*Item `json:"kernel"`
- Shift []*Transition `json:"shift"`
- Reduce []*Reduce `json:"reduce"`
- GoTo []*Transition `json:"goto"`
- SRConflict []*SRConflict `json:"sr_conflict"`
- RRConflict []*RRConflict `json:"rr_conflict"`
-}
-
-type Report struct {
- Terminals []*Terminal `json:"terminals"`
- NonTerminals []*NonTerminal `json:"non_terminals"`
- Productions []*Production `json:"productions"`
- States []*State `json:"states"`
-}
diff --git a/src/urubu/spec/grammar/grammar.go b/src/urubu/spec/grammar/grammar.go
deleted file mode 100644
index bf1ea89..0000000
--- a/src/urubu/spec/grammar/grammar.go
+++ /dev/null
@@ -1,160 +0,0 @@
-package grammar
-
-import "strconv"
-
-type CompiledGrammar struct {
- Name string `json:"name"`
- Lexical *LexicalSpec `json:"lexical"`
- Syntactic *SyntacticSpec `json:"syntactic"`
- ASTAction *ASTAction `json:"ast_action"`
-}
-
-// StateID represents an ID of a state of a transition table.
-type StateID int
-
-const (
- // StateIDNil represents an empty entry of a transition table.
- // When the driver reads this value, it raises an error meaning lexical analysis failed.
- StateIDNil = StateID(0)
-
- // StateIDMin is the minimum value of the state ID. All valid state IDs are represented as
- // sequential numbers starting from this value.
- StateIDMin = StateID(1)
-)
-
-func (id StateID) Int() int {
- return int(id)
-}
-
-// LexModeID represents an ID of a lex mode.
-type LexModeID int
-
-const (
- LexModeIDNil = LexModeID(0)
- LexModeIDDefault = LexModeID(1)
-)
-
-func (n LexModeID) String() string {
- return strconv.Itoa(int(n))
-}
-
-func (n LexModeID) Int() int {
- return int(n)
-}
-
-func (n LexModeID) IsNil() bool {
- return n == LexModeIDNil
-}
-
-// LexModeName represents a name of a lex mode.
-type LexModeName string
-
-const (
- LexModeNameNil = LexModeName("")
- LexModeNameDefault = LexModeName("default")
-)
-
-func (m LexModeName) String() string {
- return string(m)
-}
-
-// LexKindID represents an ID of a lexical kind and is unique across all modes.
-type LexKindID int
-
-const (
- LexKindIDNil = LexKindID(0)
- LexKindIDMin = LexKindID(1)
-)
-
-func (id LexKindID) Int() int {
- return int(id)
-}
-
-// LexModeKindID represents an ID of a lexical kind and is unique within a mode.
-// Use LexKindID to identify a kind across all modes uniquely.
-type LexModeKindID int
-
-const (
- LexModeKindIDNil = LexModeKindID(0)
- LexModeKindIDMin = LexModeKindID(1)
-)
-
-func (id LexModeKindID) Int() int {
- return int(id)
-}
-
-// LexKindName represents a name of a lexical kind.
-type LexKindName string
-
-const LexKindNameNil = LexKindName("")
-
-func (k LexKindName) String() string {
- return string(k)
-}
-
-type RowDisplacementTable struct {
- OriginalRowCount int `json:"original_row_count"`
- OriginalColCount int `json:"original_col_count"`
- EmptyValue StateID `json:"empty_value"`
- Entries []StateID `json:"entries"`
- Bounds []int `json:"bounds"`
- RowDisplacement []int `json:"row_displacement"`
-}
-
-type UniqueEntriesTable struct {
- UniqueEntries *RowDisplacementTable `json:"unique_entries,omitempty"`
- UncompressedUniqueEntries []StateID `json:"uncompressed_unique_entries,omitempty"`
- RowNums []int `json:"row_nums"`
- OriginalRowCount int `json:"original_row_count"`
- OriginalColCount int `json:"original_col_count"`
- EmptyValue int `json:"empty_value"`
-}
-
-type TransitionTable struct {
- InitialStateID StateID `json:"initial_state_id"`
- AcceptingStates []LexModeKindID `json:"accepting_states"`
- RowCount int `json:"row_count"`
- ColCount int `json:"col_count"`
- Transition *UniqueEntriesTable `json:"transition,omitempty"`
- UncompressedTransition []StateID `json:"uncompressed_transition,omitempty"`
-}
-
-type CompiledLexModeSpec struct {
- KindNames []LexKindName `json:"kind_names"`
- Push []LexModeID `json:"push"`
- Pop []int `json:"pop"`
- DFA *TransitionTable `json:"dfa"`
-}
-
-type LexicalSpec struct {
- InitialModeID LexModeID `json:"initial_mode_id"`
- ModeNames []LexModeName `json:"mode_names"`
- KindNames []LexKindName `json:"kind_names"`
- KindIDs [][]LexKindID `json:"kind_ids"`
- CompressionLevel int `json:"compression_level"`
- Specs []*CompiledLexModeSpec `json:"specs"`
-}
-
-type SyntacticSpec struct {
- Action []int `json:"action"`
- GoTo []int `json:"goto"`
- StateCount int `json:"state_count"`
- InitialState int `json:"initial_state"`
- StartProduction int `json:"start_production"`
- LHSSymbols []int `json:"lhs_symbols"`
- AlternativeSymbolCounts []int `json:"alternative_symbol_counts"`
- Terminals []string `json:"terminals"`
- TerminalCount int `json:"terminal_count"`
- TerminalSkip []int `json:"terminal_skip"`
- KindToTerminal []int `json:"kind_to_terminal"`
- NonTerminals []string `json:"non_terminals"`
- NonTerminalCount int `json:"non_terminal_count"`
- EOFSymbol int `json:"eof_symbol"`
- ErrorSymbol int `json:"error_symbol"`
- ErrorTrapperStates []int `json:"error_trapper_states"`
- RecoverProductions []int `json:"recover_productions"`
-}
-
-type ASTAction struct {
- Entries [][]int `json:"entries"`
-}
diff --git a/src/urubu/spec/grammar/parser/lexspec.json b/src/urubu/spec/grammar/lexspec.json
index caf1f0e..caf1f0e 100644
--- a/src/urubu/spec/grammar/parser/lexspec.json
+++ b/src/urubu/spec/grammar/lexspec.json
diff --git a/src/urubu/spec/grammar/parser/vartan_lexer.go b/src/urubu/spec/grammar/parser.go
index 76ddfde..0e5a16b 100644
--- a/src/urubu/spec/grammar/parser/vartan_lexer.go
+++ b/src/urubu/spec/grammar/parser.go
@@ -1,11 +1,920 @@
-// Code generated by maleeni-go. DO NOT EDIT.
+//go:generate maleeni compile lexspec.json -o clexspec.json
+//go:generate maleeni-go clexspec.json --package parser
+
package parser
import (
+ _ "embed"
"fmt"
"io"
"io/ioutil"
+ "regexp"
+ "strings"
+
+ verr "urubu/error"
+ spec "urubu/spec/grammar"
+)
+
+type tokenKind string
+
+const (
+ tokenKindKWFragment = tokenKind("fragment")
+ tokenKindID = tokenKind("id")
+ tokenKindTerminalPattern = tokenKind("terminal pattern")
+ tokenKindStringLiteral = tokenKind("string")
+ tokenKindColon = tokenKind(":")
+ tokenKindOr = tokenKind("|")
+ tokenKindSemicolon = tokenKind(";")
+ tokenKindLabelMarker = tokenKind("@")
+ tokenKindDirectiveMarker = tokenKind("#")
+ tokenKindExpantion = tokenKind("...")
+ tokenKindOrderedSymbolMarker = tokenKind("$")
+ tokenKindLParen = tokenKind("(")
+ tokenKindRParen = tokenKind(")")
+ tokenKindNewline = tokenKind("newline")
+ tokenKindEOF = tokenKind("eof")
+ tokenKindInvalid = tokenKind("invalid")
+)
+
+var (
+ reIDChar = regexp.MustCompile(`^[0-9a-z_]+$`)
+ reIDInvalidDigitsPos = regexp.MustCompile(`^[0-9]`)
+)
+
+type Position struct {
+ Row int
+ Col int
+}
+
+func newPosition(row, col int) Position {
+ return Position{
+ Row: row,
+ Col: col,
+ }
+}
+
+type token struct {
+ kind tokenKind
+ text string
+ pos Position
+}
+
+func newSymbolToken(kind tokenKind, pos Position) *token {
+ return &token{
+ kind: kind,
+ pos: pos,
+ }
+}
+
+func newIDToken(text string, pos Position) *token {
+ return &token{
+ kind: tokenKindID,
+ text: text,
+ pos: pos,
+ }
+}
+
+func newTerminalPatternToken(text string, pos Position) *token {
+ return &token{
+ kind: tokenKindTerminalPattern,
+ text: text,
+ pos: pos,
+ }
+}
+
+func newStringLiteralToken(text string, pos Position) *token {
+ return &token{
+ kind: tokenKindStringLiteral,
+ text: text,
+ pos: pos,
+ }
+}
+
+func newEOFToken() *token {
+ return &token{
+ kind: tokenKindEOF,
+ }
+}
+
+func newInvalidToken(text string, pos Position) *token {
+ return &token{
+ kind: tokenKindInvalid,
+ text: text,
+ pos: pos,
+ }
+}
+
+type lexer struct {
+ d *Lexer
+ buf *token
+}
+
+func newLexer(src io.Reader) (*lexer, error) {
+ d, err := NewLexer(NewLexSpec(), src)
+ if err != nil {
+ return nil, err
+ }
+ return &lexer{
+ d: d,
+ }, nil
+}
+
+func (l *lexer) next() (*token, error) {
+ if l.buf != nil {
+ tok := l.buf
+ l.buf = nil
+ return tok, nil
+ }
+
+ var newline *token
+ for {
+ tok, err := l.lexAndSkipWSs()
+ if err != nil {
+ return nil, err
+ }
+ if tok.kind == tokenKindNewline {
+ newline = tok
+ continue
+ }
+
+ if newline != nil {
+ l.buf = tok
+ return newline, nil
+ }
+ return tok, nil
+ }
+}
+
+func (l *lexer) lexAndSkipWSs() (*token, error) {
+ var tok *Token
+ for {
+ var err error
+ tok, err = l.d.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.Invalid {
+ return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
+ }
+ if tok.EOF {
+ return newEOFToken(), nil
+ }
+ switch tok.KindID {
+ case KindIDWhiteSpace:
+ continue
+ case KindIDLineComment:
+ continue
+ }
+
+ break
+ }
+
+ switch tok.KindID {
+ case KindIDNewline:
+ return newSymbolToken(tokenKindNewline, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDKwFragment:
+ return newSymbolToken(tokenKindKWFragment, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDIdentifier:
+ if !reIDChar.Match(tok.Lexeme) {
+ return nil, &verr.SpecError{
+ Cause: synErrIDInvalidChar,
+ Detail: string(tok.Lexeme),
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ if strings.HasPrefix(string(tok.Lexeme), "_") || strings.HasSuffix(string(tok.Lexeme), "_") {
+ return nil, &verr.SpecError{
+ Cause: synErrIDInvalidUnderscorePos,
+ Detail: string(tok.Lexeme),
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ if strings.Contains(string(tok.Lexeme), "__") {
+ return nil, &verr.SpecError{
+ Cause: synErrIDConsecutiveUnderscores,
+ Detail: string(tok.Lexeme),
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ if reIDInvalidDigitsPos.Match(tok.Lexeme) {
+ return nil, &verr.SpecError{
+ Cause: synErrIDInvalidDigitsPos,
+ Detail: string(tok.Lexeme),
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ return newIDToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDTerminalOpen:
+ var b strings.Builder
+ for {
+ tok, err := l.d.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.EOF {
+ return nil, &verr.SpecError{
+ Cause: synErrUnclosedTerminal,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ switch tok.KindID {
+ case KindIDPattern:
+ // The escape sequences in a pattern string are interpreted by the lexer, except for the \".
+ // We must interpret the \" before passing them to the lexer because they are delimiters for
+ // the pattern strings.
+ fmt.Fprint(&b, strings.ReplaceAll(string(tok.Lexeme), `\"`, `"`))
+ case KindIDEscapeSymbol:
+ return nil, &verr.SpecError{
+ Cause: synErrIncompletedEscSeq,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ case KindIDTerminalClose:
+ pat := b.String()
+ if pat == "" {
+ return nil, &verr.SpecError{
+ Cause: synErrEmptyPattern,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ return newTerminalPatternToken(pat, newPosition(tok.Row+1, tok.Col+1)), nil
+ }
+ }
+ case KindIDStringLiteralOpen:
+ var b strings.Builder
+ for {
+ tok, err := l.d.Next()
+ if err != nil {
+ return nil, err
+ }
+ if tok.EOF {
+ return nil, &verr.SpecError{
+ Cause: synErrUnclosedString,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ switch tok.KindID {
+ case KindIDCharSeq:
+ fmt.Fprint(&b, string(tok.Lexeme))
+ case KindIDStringLiteralClose:
+ str := b.String()
+ if str == "" {
+ return nil, &verr.SpecError{
+ Cause: synErrEmptyString,
+ Row: tok.Row + 1,
+ Col: tok.Col + 1,
+ }
+ }
+ return newStringLiteralToken(str, newPosition(tok.Row+1, tok.Col+1)), nil
+ }
+ }
+ case KindIDColon:
+ return newSymbolToken(tokenKindColon, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDOr:
+ return newSymbolToken(tokenKindOr, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDSemicolon:
+ return newSymbolToken(tokenKindSemicolon, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDLabelMarker:
+ return newSymbolToken(tokenKindLabelMarker, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDDirectiveMarker:
+ return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDExpansion:
+ return newSymbolToken(tokenKindExpantion, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDOrderedSymbolMarker:
+ return newSymbolToken(tokenKindOrderedSymbolMarker, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDLParen:
+ return newSymbolToken(tokenKindLParen, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDRParen:
+ return newSymbolToken(tokenKindRParen, newPosition(tok.Row+1, tok.Col+1)), nil
+ default:
+ return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
+ }
+}
+
+type RootNode struct {
+ Directives []*DirectiveNode
+ Productions []*ProductionNode
+ LexProductions []*ProductionNode
+ Fragments []*FragmentNode
+}
+
+type ProductionNode struct {
+ Directives []*DirectiveNode
+ LHS string
+ RHS []*AlternativeNode
+ Pos Position
+}
+
+func (n *ProductionNode) isLexical() bool {
+ if len(n.RHS) == 1 && len(n.RHS[0].Elements) == 1 && n.RHS[0].Elements[0].Pattern != "" {
+ return true
+ }
+ return false
+}
+
+type AlternativeNode struct {
+ Elements []*ElementNode
+ Directives []*DirectiveNode
+ Pos Position
+}
+
+type ElementNode struct {
+ ID string
+ Pattern string
+ Label *LabelNode
+ Literally bool
+ Pos Position
+}
+
+type LabelNode struct {
+ Name string
+ Pos Position
+}
+
+type DirectiveNode struct {
+ Name string
+ Parameters []*ParameterNode
+ Pos Position
+}
+
+type ParameterNode struct {
+ ID string
+ Pattern string
+ String string
+ OrderedSymbol string
+ Group []*DirectiveNode
+ Expansion bool
+ Pos Position
+}
+
+type FragmentNode struct {
+ LHS string
+ RHS string
+ Pos Position
+}
+
+func raiseSyntaxError(row int, synErr *SyntaxError) {
+ panic(&verr.SpecError{
+ Cause: synErr,
+ Row: row,
+ })
+}
+
+func raiseSyntaxErrorWithDetail(row int, synErr *SyntaxError, detail string) {
+ panic(&verr.SpecError{
+ Cause: synErr,
+ Detail: detail,
+ Row: row,
+ })
+}
+
+func Parse(src io.Reader) (*RootNode, error) {
+ p, err := newParser(src)
+ if err != nil {
+ return nil, err
+ }
+
+ return p.parse()
+}
+
+type parser struct {
+ lex *lexer
+ peekedTok *token
+ lastTok *token
+ errs verr.SpecErrors
+
+ // A token position that the parser read at last.
+ // It is used as additional information in error messages.
+ pos Position
+}
+
+func newParser(src io.Reader) (*parser, error) {
+ lex, err := newLexer(src)
+ if err != nil {
+ return nil, err
+ }
+ return &parser{
+ lex: lex,
+ }, nil
+}
+
+func (p *parser) parse() (root *RootNode, retErr error) {
+ root = p.parseRoot()
+ if len(p.errs) > 0 {
+ return nil, p.errs
+ }
+
+ return root, nil
+}
+
+func (p *parser) parseRoot() *RootNode {
+ defer func() {
+ err := recover()
+ if err != nil {
+ specErr, ok := err.(*verr.SpecError)
+ if !ok {
+ panic(fmt.Errorf("an unexpected error occurred: %v", err))
+ }
+ p.errs = append(p.errs, specErr)
+ }
+ }()
+
+ var dirs []*DirectiveNode
+ var prods []*ProductionNode
+ var lexProds []*ProductionNode
+ var fragments []*FragmentNode
+ for {
+ dir := p.parseTopLevelDirective()
+ if dir != nil {
+ dirs = append(dirs, dir)
+ continue
+ }
+
+ fragment := p.parseFragment()
+ if fragment != nil {
+ fragments = append(fragments, fragment)
+ continue
+ }
+
+ prod := p.parseProduction()
+ if prod != nil {
+ if prod.isLexical() {
+ lexProds = append(lexProds, prod)
+ } else {
+ prods = append(prods, prod)
+ }
+ continue
+ }
+
+ if p.consume(tokenKindEOF) {
+ break
+ }
+ }
+
+ return &RootNode{
+ Directives: dirs,
+ Productions: prods,
+ LexProductions: lexProds,
+ Fragments: fragments,
+ }
+}
+
+func (p *parser) parseTopLevelDirective() *DirectiveNode {
+ defer func() {
+ err := recover()
+ if err == nil {
+ return
+ }
+
+ specErr, ok := err.(*verr.SpecError)
+ if !ok {
+ panic(err)
+ }
+
+ p.errs = append(p.errs, specErr)
+ p.skipOverTo(tokenKindSemicolon)
+ }()
+
+ dir := p.parseDirective()
+ if dir == nil {
+ return nil
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindSemicolon) {
+ raiseSyntaxError(p.pos.Row, synErrTopLevelDirNoSemicolon)
+ }
+
+ return dir
+}
+
+func (p *parser) parseFragment() *FragmentNode {
+ defer func() {
+ err := recover()
+ if err == nil {
+ return
+ }
+
+ specErr, ok := err.(*verr.SpecError)
+ if !ok {
+ panic(err)
+ }
+
+ p.errs = append(p.errs, specErr)
+ p.skipOverTo(tokenKindSemicolon)
+ }()
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindKWFragment) {
+ return nil
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoProductionName)
+ }
+ lhs := p.lastTok.text
+ lhsPos := p.lastTok.pos
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindColon) {
+ raiseSyntaxError(p.pos.Row, synErrNoColon)
+ }
+
+ var rhs string
+ switch {
+ case p.consume(tokenKindTerminalPattern):
+ rhs = p.lastTok.text
+ case p.consume(tokenKindStringLiteral):
+ rhs = spec.EscapePattern(p.lastTok.text)
+ default:
+ raiseSyntaxError(p.pos.Row, synErrFragmentNoPattern)
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindSemicolon) {
+ raiseSyntaxError(p.pos.Row, synErrNoSemicolon)
+ }
+
+ if !p.consume(tokenKindNewline) {
+ if !p.consume(tokenKindEOF) {
+ raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline)
+ }
+ }
+
+ return &FragmentNode{
+ LHS: lhs,
+ RHS: rhs,
+ Pos: lhsPos,
+ }
+}
+
+func (p *parser) parseProduction() *ProductionNode {
+ defer func() {
+ err := recover()
+ if err == nil {
+ return
+ }
+
+ specErr, ok := err.(*verr.SpecError)
+ if !ok {
+ panic(err)
+ }
+
+ p.errs = append(p.errs, specErr)
+ p.skipOverTo(tokenKindSemicolon)
+ }()
+
+ p.consume(tokenKindNewline)
+
+ if p.consume(tokenKindEOF) {
+ return nil
+ }
+
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoProductionName)
+ }
+ lhs := p.lastTok.text
+ lhsPos := p.lastTok.pos
+
+ var dirs []*DirectiveNode
+ for {
+ dir := p.parseDirective()
+ if dir == nil {
+ break
+ }
+ dirs = append(dirs, dir)
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindColon) {
+ raiseSyntaxError(p.pos.Row, synErrNoColon)
+ }
+
+ alt := p.parseAlternative()
+ rhs := []*AlternativeNode{alt}
+ for {
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindOr) {
+ break
+ }
+ alt := p.parseAlternative()
+ rhs = append(rhs, alt)
+ }
+
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindSemicolon) {
+ raiseSyntaxError(p.pos.Row, synErrNoSemicolon)
+ }
+
+ if !p.consume(tokenKindNewline) {
+ if !p.consume(tokenKindEOF) {
+ raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline)
+ }
+ }
+
+ prod := &ProductionNode{
+ Directives: dirs,
+ LHS: lhs,
+ RHS: rhs,
+ Pos: lhsPos,
+ }
+
+ // Vartan's driver must provide a user with the names of expected tokens when a syntax error occurs.
+ // However, if a pattern appears directly in an alternative, Vartan's compiler cannot assign an appropriate
+ // name to the pattern. Therefore, this code prohibits alternatives from containing patterns.
+ if !prod.isLexical() {
+ for _, alt := range prod.RHS {
+ for _, elem := range alt.Elements {
+ if elem.Pattern != "" {
+ raiseSyntaxError(elem.Pos.Row, synErrPatternInAlt)
+ }
+ }
+ }
+ }
+
+ return prod
+}
+
+func (p *parser) parseAlternative() *AlternativeNode {
+ elems := []*ElementNode{}
+ for {
+ elem := p.parseElement()
+ if elem == nil {
+ break
+ }
+ elems = append(elems, elem)
+ }
+
+ // When a length of an alternative is zero, we cannot set a position.
+ var firstElemPos Position
+ if len(elems) > 0 {
+ firstElemPos = elems[0].Pos
+ }
+
+ var dirs []*DirectiveNode
+ for {
+ dir := p.parseDirective()
+ if dir == nil {
+ break
+ }
+ dirs = append(dirs, dir)
+ }
+
+ return &AlternativeNode{
+ Elements: elems,
+ Directives: dirs,
+ Pos: firstElemPos,
+ }
+}
+
+func (p *parser) parseElement() *ElementNode {
+ var elem *ElementNode
+ switch {
+ case p.consume(tokenKindID):
+ elem = &ElementNode{
+ ID: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindTerminalPattern):
+ elem = &ElementNode{
+ Pattern: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindStringLiteral):
+ elem = &ElementNode{
+ Pattern: p.lastTok.text,
+ Literally: true,
+ Pos: p.lastTok.pos,
+ }
+ default:
+ if p.consume(tokenKindLabelMarker) {
+ raiseSyntaxError(p.pos.Row, synErrLabelWithNoSymbol)
+ }
+ return nil
+ }
+ if p.consume(tokenKindLabelMarker) {
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoLabel)
+ }
+ elem.Label = &LabelNode{
+ Name: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ }
+ return elem
+}
+
+func (p *parser) parseDirective() *DirectiveNode {
+ p.consume(tokenKindNewline)
+
+ if !p.consume(tokenKindDirectiveMarker) {
+ return nil
+ }
+ dirPos := p.lastTok.pos
+
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoDirectiveName)
+ }
+ name := p.lastTok.text
+
+ var params []*ParameterNode
+ for {
+ param := p.parseParameter()
+ if param == nil {
+ break
+ }
+ params = append(params, param)
+ }
+
+ return &DirectiveNode{
+ Name: name,
+ Parameters: params,
+ Pos: dirPos,
+ }
+}
+
+func (p *parser) parseParameter() *ParameterNode {
+ var param *ParameterNode
+ switch {
+ case p.consume(tokenKindID):
+ param = &ParameterNode{
+ ID: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindTerminalPattern):
+ param = &ParameterNode{
+ Pattern: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindStringLiteral):
+ param = &ParameterNode{
+ String: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindOrderedSymbolMarker):
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoOrderedSymbolName)
+ }
+ param = &ParameterNode{
+ OrderedSymbol: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
+ case p.consume(tokenKindLParen):
+ pos := p.lastTok.pos
+ var g []*DirectiveNode
+ for {
+ dir := p.parseDirective()
+ if dir == nil {
+ break
+ }
+ g = append(g, dir)
+ }
+ if !p.consume(tokenKindRParen) {
+ raiseSyntaxError(p.pos.Row, synErrUnclosedDirGroup)
+ }
+ if len(g) == 0 {
+ // Set an empty slice representing an empty directive group to distinguish between the following two cases.
+ //
+ // - #prec (); // vartan allows this case.
+ // - #prec; // This case will raise an error.
+ g = []*DirectiveNode{}
+ }
+ param = &ParameterNode{
+ Group: g,
+ Pos: pos,
+ }
+ }
+ if p.consume(tokenKindExpantion) {
+ switch {
+ case param == nil:
+ raiseSyntaxError(p.pos.Row, synErrStrayExpOp)
+ case param.ID == "":
+ raiseSyntaxError(p.pos.Row, synErrInvalidExpOperand)
+ }
+ param.Expansion = true
+ }
+ return param
+}
+
+func (p *parser) consume(expected tokenKind) bool {
+ var tok *token
+ var err error
+ if p.peekedTok != nil {
+ tok = p.peekedTok
+ p.peekedTok = nil
+ } else {
+ tok, err = p.lex.next()
+ if err != nil {
+ panic(err)
+ }
+ }
+ p.pos = tok.pos
+ if tok.kind == tokenKindInvalid {
+ raiseSyntaxErrorWithDetail(p.pos.Row, synErrInvalidToken, tok.text)
+ }
+ if tok.kind == expected {
+ p.lastTok = tok
+ return true
+ }
+ p.peekedTok = tok
+
+ return false
+}
+
+func (p *parser) skip() {
+ var tok *token
+ var err error
+ for {
+ if p.peekedTok != nil {
+ tok = p.peekedTok
+ p.peekedTok = nil
+ } else {
+ tok, err = p.lex.next()
+ if err != nil {
+ p.errs = append(p.errs, &verr.SpecError{
+ Cause: err,
+ Row: p.pos.Row,
+ })
+ continue
+ }
+ }
+
+ break
+ }
+
+ p.lastTok = tok
+ p.pos = tok.pos
+}
+
+func (p *parser) skipOverTo(kind tokenKind) {
+ for {
+ if p.consume(kind) || p.consume(tokenKindEOF) {
+ return
+ }
+ p.skip()
+ }
+}
+
+type SyntaxError struct {
+ message string
+}
+
+func newSyntaxError(message string) *SyntaxError {
+ return &SyntaxError{
+ message: message,
+ }
+}
+
+func (e *SyntaxError) Error() string {
+ return e.message
+}
+
+var (
+ // lexical errors
+ synErrIDInvalidChar = newSyntaxError("an identifier can contain only the lower-case letter, the digits, and the underscore")
+ synErrIDInvalidUnderscorePos = newSyntaxError("the underscore cannot be placed at the beginning or end of an identifier")
+ synErrIDConsecutiveUnderscores = newSyntaxError("the underscore cannot be placed consecutively")
+ synErrIDInvalidDigitsPos = newSyntaxError("the digits cannot be placed at the biginning of an identifier")
+ synErrUnclosedTerminal = newSyntaxError("unclosed terminal")
+ synErrUnclosedString = newSyntaxError("unclosed string")
+ synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash")
+ synErrEmptyPattern = newSyntaxError("a pattern must include at least one character")
+ synErrEmptyString = newSyntaxError("a string must include at least one character")
+
+ // syntax errors
+ synErrInvalidToken = newSyntaxError("invalid token")
+ synErrTopLevelDirNoSemicolon = newSyntaxError("a top-level directive must be followed by ;")
+ synErrNoProductionName = newSyntaxError("a production name is missing")
+ synErrNoColon = newSyntaxError("the colon must precede alternatives")
+ synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative")
+ synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol")
+ synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @")
+ synErrNoDirectiveName = newSyntaxError("a directive needs a name")
+ synErrNoOrderedSymbolName = newSyntaxError("an ordered symbol name is missing")
+ synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )")
+ synErrPatternInAlt = newSyntaxError("a pattern literal cannot appear directly in an alternative. instead, please define a terminal symbol with the pattern literal")
+ synErrStrayExpOp = newSyntaxError("an expansion operator ... must be preceded by an identifier")
+ synErrInvalidExpOperand = newSyntaxError("an expansion operator ... can be applied to only an identifier")
+ synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline")
+ synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element")
)
+// Code generated by maleeni-go. DO NOT EDIT.
type ModeID int
diff --git a/src/urubu/spec/grammar/parser/lexer.go b/src/urubu/spec/grammar/parser/lexer.go
deleted file mode 100644
index bd8a24f..0000000
--- a/src/urubu/spec/grammar/parser/lexer.go
+++ /dev/null
@@ -1,297 +0,0 @@
-//go:generate maleeni compile lexspec.json -o clexspec.json
-//go:generate maleeni-go clexspec.json --package parser
-
-package parser
-
-import (
- _ "embed"
- "fmt"
- "io"
- "regexp"
- "strings"
-
- verr "urubu/error"
-)
-
-type tokenKind string
-
-const (
- tokenKindKWFragment = tokenKind("fragment")
- tokenKindID = tokenKind("id")
- tokenKindTerminalPattern = tokenKind("terminal pattern")
- tokenKindStringLiteral = tokenKind("string")
- tokenKindColon = tokenKind(":")
- tokenKindOr = tokenKind("|")
- tokenKindSemicolon = tokenKind(";")
- tokenKindLabelMarker = tokenKind("@")
- tokenKindDirectiveMarker = tokenKind("#")
- tokenKindExpantion = tokenKind("...")
- tokenKindOrderedSymbolMarker = tokenKind("$")
- tokenKindLParen = tokenKind("(")
- tokenKindRParen = tokenKind(")")
- tokenKindNewline = tokenKind("newline")
- tokenKindEOF = tokenKind("eof")
- tokenKindInvalid = tokenKind("invalid")
-)
-
-var (
- reIDChar = regexp.MustCompile(`^[0-9a-z_]+$`)
- reIDInvalidDigitsPos = regexp.MustCompile(`^[0-9]`)
-)
-
-type Position struct {
- Row int
- Col int
-}
-
-func newPosition(row, col int) Position {
- return Position{
- Row: row,
- Col: col,
- }
-}
-
-type token struct {
- kind tokenKind
- text string
- pos Position
-}
-
-func newSymbolToken(kind tokenKind, pos Position) *token {
- return &token{
- kind: kind,
- pos: pos,
- }
-}
-
-func newIDToken(text string, pos Position) *token {
- return &token{
- kind: tokenKindID,
- text: text,
- pos: pos,
- }
-}
-
-func newTerminalPatternToken(text string, pos Position) *token {
- return &token{
- kind: tokenKindTerminalPattern,
- text: text,
- pos: pos,
- }
-}
-
-func newStringLiteralToken(text string, pos Position) *token {
- return &token{
- kind: tokenKindStringLiteral,
- text: text,
- pos: pos,
- }
-}
-
-func newEOFToken() *token {
- return &token{
- kind: tokenKindEOF,
- }
-}
-
-func newInvalidToken(text string, pos Position) *token {
- return &token{
- kind: tokenKindInvalid,
- text: text,
- pos: pos,
- }
-}
-
-type lexer struct {
- d *Lexer
- buf *token
-}
-
-func newLexer(src io.Reader) (*lexer, error) {
- d, err := NewLexer(NewLexSpec(), src)
- if err != nil {
- return nil, err
- }
- return &lexer{
- d: d,
- }, nil
-}
-
-func (l *lexer) next() (*token, error) {
- if l.buf != nil {
- tok := l.buf
- l.buf = nil
- return tok, nil
- }
-
- var newline *token
- for {
- tok, err := l.lexAndSkipWSs()
- if err != nil {
- return nil, err
- }
- if tok.kind == tokenKindNewline {
- newline = tok
- continue
- }
-
- if newline != nil {
- l.buf = tok
- return newline, nil
- }
- return tok, nil
- }
-}
-
-func (l *lexer) lexAndSkipWSs() (*token, error) {
- var tok *Token
- for {
- var err error
- tok, err = l.d.Next()
- if err != nil {
- return nil, err
- }
- if tok.Invalid {
- return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
- }
- if tok.EOF {
- return newEOFToken(), nil
- }
- switch tok.KindID {
- case KindIDWhiteSpace:
- continue
- case KindIDLineComment:
- continue
- }
-
- break
- }
-
- switch tok.KindID {
- case KindIDNewline:
- return newSymbolToken(tokenKindNewline, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDKwFragment:
- return newSymbolToken(tokenKindKWFragment, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDIdentifier:
- if !reIDChar.Match(tok.Lexeme) {
- return nil, &verr.SpecError{
- Cause: synErrIDInvalidChar,
- Detail: string(tok.Lexeme),
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- if strings.HasPrefix(string(tok.Lexeme), "_") || strings.HasSuffix(string(tok.Lexeme), "_") {
- return nil, &verr.SpecError{
- Cause: synErrIDInvalidUnderscorePos,
- Detail: string(tok.Lexeme),
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- if strings.Contains(string(tok.Lexeme), "__") {
- return nil, &verr.SpecError{
- Cause: synErrIDConsecutiveUnderscores,
- Detail: string(tok.Lexeme),
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- if reIDInvalidDigitsPos.Match(tok.Lexeme) {
- return nil, &verr.SpecError{
- Cause: synErrIDInvalidDigitsPos,
- Detail: string(tok.Lexeme),
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- return newIDToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDTerminalOpen:
- var b strings.Builder
- for {
- tok, err := l.d.Next()
- if err != nil {
- return nil, err
- }
- if tok.EOF {
- return nil, &verr.SpecError{
- Cause: synErrUnclosedTerminal,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- switch tok.KindID {
- case KindIDPattern:
- // The escape sequences in a pattern string are interpreted by the lexer, except for the \".
- // We must interpret the \" before passing them to the lexer because they are delimiters for
- // the pattern strings.
- fmt.Fprint(&b, strings.ReplaceAll(string(tok.Lexeme), `\"`, `"`))
- case KindIDEscapeSymbol:
- return nil, &verr.SpecError{
- Cause: synErrIncompletedEscSeq,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- case KindIDTerminalClose:
- pat := b.String()
- if pat == "" {
- return nil, &verr.SpecError{
- Cause: synErrEmptyPattern,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- return newTerminalPatternToken(pat, newPosition(tok.Row+1, tok.Col+1)), nil
- }
- }
- case KindIDStringLiteralOpen:
- var b strings.Builder
- for {
- tok, err := l.d.Next()
- if err != nil {
- return nil, err
- }
- if tok.EOF {
- return nil, &verr.SpecError{
- Cause: synErrUnclosedString,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- switch tok.KindID {
- case KindIDCharSeq:
- fmt.Fprint(&b, string(tok.Lexeme))
- case KindIDStringLiteralClose:
- str := b.String()
- if str == "" {
- return nil, &verr.SpecError{
- Cause: synErrEmptyString,
- Row: tok.Row + 1,
- Col: tok.Col + 1,
- }
- }
- return newStringLiteralToken(str, newPosition(tok.Row+1, tok.Col+1)), nil
- }
- }
- case KindIDColon:
- return newSymbolToken(tokenKindColon, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDOr:
- return newSymbolToken(tokenKindOr, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDSemicolon:
- return newSymbolToken(tokenKindSemicolon, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDLabelMarker:
- return newSymbolToken(tokenKindLabelMarker, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDDirectiveMarker:
- return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDExpansion:
- return newSymbolToken(tokenKindExpantion, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDOrderedSymbolMarker:
- return newSymbolToken(tokenKindOrderedSymbolMarker, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDLParen:
- return newSymbolToken(tokenKindLParen, newPosition(tok.Row+1, tok.Col+1)), nil
- case KindIDRParen:
- return newSymbolToken(tokenKindRParen, newPosition(tok.Row+1, tok.Col+1)), nil
- default:
- return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil
- }
-}
diff --git a/src/urubu/spec/grammar/parser/parser.go b/src/urubu/spec/grammar/parser/parser.go
deleted file mode 100644
index b604074..0000000
--- a/src/urubu/spec/grammar/parser/parser.go
+++ /dev/null
@@ -1,582 +0,0 @@
-package parser
-
-import (
- "fmt"
- "io"
-
- verr "urubu/error"
- spec "urubu/spec/grammar"
-)
-
-type RootNode struct {
- Directives []*DirectiveNode
- Productions []*ProductionNode
- LexProductions []*ProductionNode
- Fragments []*FragmentNode
-}
-
-type ProductionNode struct {
- Directives []*DirectiveNode
- LHS string
- RHS []*AlternativeNode
- Pos Position
-}
-
-func (n *ProductionNode) isLexical() bool {
- if len(n.RHS) == 1 && len(n.RHS[0].Elements) == 1 && n.RHS[0].Elements[0].Pattern != "" {
- return true
- }
- return false
-}
-
-type AlternativeNode struct {
- Elements []*ElementNode
- Directives []*DirectiveNode
- Pos Position
-}
-
-type ElementNode struct {
- ID string
- Pattern string
- Label *LabelNode
- Literally bool
- Pos Position
-}
-
-type LabelNode struct {
- Name string
- Pos Position
-}
-
-type DirectiveNode struct {
- Name string
- Parameters []*ParameterNode
- Pos Position
-}
-
-type ParameterNode struct {
- ID string
- Pattern string
- String string
- OrderedSymbol string
- Group []*DirectiveNode
- Expansion bool
- Pos Position
-}
-
-type FragmentNode struct {
- LHS string
- RHS string
- Pos Position
-}
-
-func raiseSyntaxError(row int, synErr *SyntaxError) {
- panic(&verr.SpecError{
- Cause: synErr,
- Row: row,
- })
-}
-
-func raiseSyntaxErrorWithDetail(row int, synErr *SyntaxError, detail string) {
- panic(&verr.SpecError{
- Cause: synErr,
- Detail: detail,
- Row: row,
- })
-}
-
-func Parse(src io.Reader) (*RootNode, error) {
- p, err := newParser(src)
- if err != nil {
- return nil, err
- }
-
- return p.parse()
-}
-
-type parser struct {
- lex *lexer
- peekedTok *token
- lastTok *token
- errs verr.SpecErrors
-
- // A token position that the parser read at last.
- // It is used as additional information in error messages.
- pos Position
-}
-
-func newParser(src io.Reader) (*parser, error) {
- lex, err := newLexer(src)
- if err != nil {
- return nil, err
- }
- return &parser{
- lex: lex,
- }, nil
-}
-
-func (p *parser) parse() (root *RootNode, retErr error) {
- root = p.parseRoot()
- if len(p.errs) > 0 {
- return nil, p.errs
- }
-
- return root, nil
-}
-
-func (p *parser) parseRoot() *RootNode {
- defer func() {
- err := recover()
- if err != nil {
- specErr, ok := err.(*verr.SpecError)
- if !ok {
- panic(fmt.Errorf("an unexpected error occurred: %v", err))
- }
- p.errs = append(p.errs, specErr)
- }
- }()
-
- var dirs []*DirectiveNode
- var prods []*ProductionNode
- var lexProds []*ProductionNode
- var fragments []*FragmentNode
- for {
- dir := p.parseTopLevelDirective()
- if dir != nil {
- dirs = append(dirs, dir)
- continue
- }
-
- fragment := p.parseFragment()
- if fragment != nil {
- fragments = append(fragments, fragment)
- continue
- }
-
- prod := p.parseProduction()
- if prod != nil {
- if prod.isLexical() {
- lexProds = append(lexProds, prod)
- } else {
- prods = append(prods, prod)
- }
- continue
- }
-
- if p.consume(tokenKindEOF) {
- break
- }
- }
-
- return &RootNode{
- Directives: dirs,
- Productions: prods,
- LexProductions: lexProds,
- Fragments: fragments,
- }
-}
-
-func (p *parser) parseTopLevelDirective() *DirectiveNode {
- defer func() {
- err := recover()
- if err == nil {
- return
- }
-
- specErr, ok := err.(*verr.SpecError)
- if !ok {
- panic(err)
- }
-
- p.errs = append(p.errs, specErr)
- p.skipOverTo(tokenKindSemicolon)
- }()
-
- dir := p.parseDirective()
- if dir == nil {
- return nil
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindSemicolon) {
- raiseSyntaxError(p.pos.Row, synErrTopLevelDirNoSemicolon)
- }
-
- return dir
-}
-
-func (p *parser) parseFragment() *FragmentNode {
- defer func() {
- err := recover()
- if err == nil {
- return
- }
-
- specErr, ok := err.(*verr.SpecError)
- if !ok {
- panic(err)
- }
-
- p.errs = append(p.errs, specErr)
- p.skipOverTo(tokenKindSemicolon)
- }()
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindKWFragment) {
- return nil
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoProductionName)
- }
- lhs := p.lastTok.text
- lhsPos := p.lastTok.pos
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindColon) {
- raiseSyntaxError(p.pos.Row, synErrNoColon)
- }
-
- var rhs string
- switch {
- case p.consume(tokenKindTerminalPattern):
- rhs = p.lastTok.text
- case p.consume(tokenKindStringLiteral):
- rhs = spec.EscapePattern(p.lastTok.text)
- default:
- raiseSyntaxError(p.pos.Row, synErrFragmentNoPattern)
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindSemicolon) {
- raiseSyntaxError(p.pos.Row, synErrNoSemicolon)
- }
-
- if !p.consume(tokenKindNewline) {
- if !p.consume(tokenKindEOF) {
- raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline)
- }
- }
-
- return &FragmentNode{
- LHS: lhs,
- RHS: rhs,
- Pos: lhsPos,
- }
-}
-
-func (p *parser) parseProduction() *ProductionNode {
- defer func() {
- err := recover()
- if err == nil {
- return
- }
-
- specErr, ok := err.(*verr.SpecError)
- if !ok {
- panic(err)
- }
-
- p.errs = append(p.errs, specErr)
- p.skipOverTo(tokenKindSemicolon)
- }()
-
- p.consume(tokenKindNewline)
-
- if p.consume(tokenKindEOF) {
- return nil
- }
-
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoProductionName)
- }
- lhs := p.lastTok.text
- lhsPos := p.lastTok.pos
-
- var dirs []*DirectiveNode
- for {
- dir := p.parseDirective()
- if dir == nil {
- break
- }
- dirs = append(dirs, dir)
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindColon) {
- raiseSyntaxError(p.pos.Row, synErrNoColon)
- }
-
- alt := p.parseAlternative()
- rhs := []*AlternativeNode{alt}
- for {
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindOr) {
- break
- }
- alt := p.parseAlternative()
- rhs = append(rhs, alt)
- }
-
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindSemicolon) {
- raiseSyntaxError(p.pos.Row, synErrNoSemicolon)
- }
-
- if !p.consume(tokenKindNewline) {
- if !p.consume(tokenKindEOF) {
- raiseSyntaxError(p.pos.Row, synErrSemicolonNoNewline)
- }
- }
-
- prod := &ProductionNode{
- Directives: dirs,
- LHS: lhs,
- RHS: rhs,
- Pos: lhsPos,
- }
-
- // Vartan's driver must provide a user with the names of expected tokens when a syntax error occurs.
- // However, if a pattern appears directly in an alternative, Vartan's compiler cannot assign an appropriate
- // name to the pattern. Therefore, this code prohibits alternatives from containing patterns.
- if !prod.isLexical() {
- for _, alt := range prod.RHS {
- for _, elem := range alt.Elements {
- if elem.Pattern != "" {
- raiseSyntaxError(elem.Pos.Row, synErrPatternInAlt)
- }
- }
- }
- }
-
- return prod
-}
-
-func (p *parser) parseAlternative() *AlternativeNode {
- elems := []*ElementNode{}
- for {
- elem := p.parseElement()
- if elem == nil {
- break
- }
- elems = append(elems, elem)
- }
-
- // When a length of an alternative is zero, we cannot set a position.
- var firstElemPos Position
- if len(elems) > 0 {
- firstElemPos = elems[0].Pos
- }
-
- var dirs []*DirectiveNode
- for {
- dir := p.parseDirective()
- if dir == nil {
- break
- }
- dirs = append(dirs, dir)
- }
-
- return &AlternativeNode{
- Elements: elems,
- Directives: dirs,
- Pos: firstElemPos,
- }
-}
-
-func (p *parser) parseElement() *ElementNode {
- var elem *ElementNode
- switch {
- case p.consume(tokenKindID):
- elem = &ElementNode{
- ID: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindTerminalPattern):
- elem = &ElementNode{
- Pattern: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindStringLiteral):
- elem = &ElementNode{
- Pattern: p.lastTok.text,
- Literally: true,
- Pos: p.lastTok.pos,
- }
- default:
- if p.consume(tokenKindLabelMarker) {
- raiseSyntaxError(p.pos.Row, synErrLabelWithNoSymbol)
- }
- return nil
- }
- if p.consume(tokenKindLabelMarker) {
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoLabel)
- }
- elem.Label = &LabelNode{
- Name: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- }
- return elem
-}
-
-func (p *parser) parseDirective() *DirectiveNode {
- p.consume(tokenKindNewline)
-
- if !p.consume(tokenKindDirectiveMarker) {
- return nil
- }
- dirPos := p.lastTok.pos
-
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoDirectiveName)
- }
- name := p.lastTok.text
-
- var params []*ParameterNode
- for {
- param := p.parseParameter()
- if param == nil {
- break
- }
- params = append(params, param)
- }
-
- return &DirectiveNode{
- Name: name,
- Parameters: params,
- Pos: dirPos,
- }
-}
-
-func (p *parser) parseParameter() *ParameterNode {
- var param *ParameterNode
- switch {
- case p.consume(tokenKindID):
- param = &ParameterNode{
- ID: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindTerminalPattern):
- param = &ParameterNode{
- Pattern: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindStringLiteral):
- param = &ParameterNode{
- String: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindOrderedSymbolMarker):
- if !p.consume(tokenKindID) {
- raiseSyntaxError(p.pos.Row, synErrNoOrderedSymbolName)
- }
- param = &ParameterNode{
- OrderedSymbol: p.lastTok.text,
- Pos: p.lastTok.pos,
- }
- case p.consume(tokenKindLParen):
- pos := p.lastTok.pos
- var g []*DirectiveNode
- for {
- dir := p.parseDirective()
- if dir == nil {
- break
- }
- g = append(g, dir)
- }
- if !p.consume(tokenKindRParen) {
- raiseSyntaxError(p.pos.Row, synErrUnclosedDirGroup)
- }
- if len(g) == 0 {
- // Set an empty slice representing an empty directive group to distinguish between the following two cases.
- //
- // - #prec (); // vartan allows this case.
- // - #prec; // This case will raise an error.
- g = []*DirectiveNode{}
- }
- param = &ParameterNode{
- Group: g,
- Pos: pos,
- }
- }
- if p.consume(tokenKindExpantion) {
- switch {
- case param == nil:
- raiseSyntaxError(p.pos.Row, synErrStrayExpOp)
- case param.ID == "":
- raiseSyntaxError(p.pos.Row, synErrInvalidExpOperand)
- }
- param.Expansion = true
- }
- return param
-}
-
-func (p *parser) consume(expected tokenKind) bool {
- var tok *token
- var err error
- if p.peekedTok != nil {
- tok = p.peekedTok
- p.peekedTok = nil
- } else {
- tok, err = p.lex.next()
- if err != nil {
- panic(err)
- }
- }
- p.pos = tok.pos
- if tok.kind == tokenKindInvalid {
- raiseSyntaxErrorWithDetail(p.pos.Row, synErrInvalidToken, tok.text)
- }
- if tok.kind == expected {
- p.lastTok = tok
- return true
- }
- p.peekedTok = tok
-
- return false
-}
-
-func (p *parser) skip() {
- var tok *token
- var err error
- for {
- if p.peekedTok != nil {
- tok = p.peekedTok
- p.peekedTok = nil
- } else {
- tok, err = p.lex.next()
- if err != nil {
- p.errs = append(p.errs, &verr.SpecError{
- Cause: err,
- Row: p.pos.Row,
- })
- continue
- }
- }
-
- break
- }
-
- p.lastTok = tok
- p.pos = tok.pos
-}
-
-func (p *parser) skipOverTo(kind tokenKind) {
- for {
- if p.consume(kind) || p.consume(tokenKindEOF) {
- return
- }
- p.skip()
- }
-}
diff --git a/src/urubu/spec/grammar/parser/syntax_error.go b/src/urubu/spec/grammar/parser/syntax_error.go
deleted file mode 100644
index 719fb94..0000000
--- a/src/urubu/spec/grammar/parser/syntax_error.go
+++ /dev/null
@@ -1,45 +0,0 @@
-package parser
-
-type SyntaxError struct {
- message string
-}
-
-func newSyntaxError(message string) *SyntaxError {
- return &SyntaxError{
- message: message,
- }
-}
-
-func (e *SyntaxError) Error() string {
- return e.message
-}
-
-var (
- // lexical errors
- synErrIDInvalidChar = newSyntaxError("an identifier can contain only the lower-case letter, the digits, and the underscore")
- synErrIDInvalidUnderscorePos = newSyntaxError("the underscore cannot be placed at the beginning or end of an identifier")
- synErrIDConsecutiveUnderscores = newSyntaxError("the underscore cannot be placed consecutively")
- synErrIDInvalidDigitsPos = newSyntaxError("the digits cannot be placed at the biginning of an identifier")
- synErrUnclosedTerminal = newSyntaxError("unclosed terminal")
- synErrUnclosedString = newSyntaxError("unclosed string")
- synErrIncompletedEscSeq = newSyntaxError("incompleted escape sequence; unexpected EOF following a backslash")
- synErrEmptyPattern = newSyntaxError("a pattern must include at least one character")
- synErrEmptyString = newSyntaxError("a string must include at least one character")
-
- // syntax errors
- synErrInvalidToken = newSyntaxError("invalid token")
- synErrTopLevelDirNoSemicolon = newSyntaxError("a top-level directive must be followed by ;")
- synErrNoProductionName = newSyntaxError("a production name is missing")
- synErrNoColon = newSyntaxError("the colon must precede alternatives")
- synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative")
- synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol")
- synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @")
- synErrNoDirectiveName = newSyntaxError("a directive needs a name")
- synErrNoOrderedSymbolName = newSyntaxError("an ordered symbol name is missing")
- synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )")
- synErrPatternInAlt = newSyntaxError("a pattern literal cannot appear directly in an alternative. instead, please define a terminal symbol with the pattern literal")
- synErrStrayExpOp = newSyntaxError("an expansion operator ... must be preceded by an identifier")
- synErrInvalidExpOperand = newSyntaxError("an expansion operator ... can be applied to only an identifier")
- synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline")
- synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element")
-)
diff --git a/src/urubu/spec/grammar/util.go b/src/urubu/spec/grammar/util.go
deleted file mode 100644
index bf3f233..0000000
--- a/src/urubu/spec/grammar/util.go
+++ /dev/null
@@ -1,21 +0,0 @@
-package grammar
-
-import "strings"
-
-var rep = strings.NewReplacer(
- `.`, `\.`,
- `*`, `\*`,
- `+`, `\+`,
- `?`, `\?`,
- `|`, `\|`,
- `(`, `\(`,
- `)`, `\)`,
- `[`, `\[`,
- `\`, `\\`,
-)
-
-// EscapePattern escapes the special characters.
-// For example, EscapePattern(`+`) returns `\+`.
-func EscapePattern(s string) string {
- return rep.Replace(s)
-}