diff options
Diffstat (limited to 'spec')
-rw-r--r-- | spec/lexer.go | 9 | ||||
-rw-r--r-- | spec/lexer_test.go | 5 | ||||
-rw-r--r-- | spec/lexspec.json | 8 | ||||
-rw-r--r-- | spec/parser.go | 69 | ||||
-rw-r--r-- | spec/parser_test.go | 169 | ||||
-rw-r--r-- | spec/syntax_error.go | 21 | ||||
-rw-r--r-- | spec/vartan_lexer.go | 44 |
7 files changed, 230 insertions, 95 deletions
diff --git a/spec/lexer.go b/spec/lexer.go index f9ad871..2459c40 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -25,7 +25,8 @@ const ( tokenKindLabelMarker = tokenKind("@") tokenKindDirectiveMarker = tokenKind("#") tokenKindExpantion = tokenKind("...") - tokenKindMetaDataMarker = tokenKind("%") + tokenKindLParen = tokenKind("(") + tokenKindRParen = tokenKind(")") tokenKindNewline = tokenKind("newline") tokenKindEOF = tokenKind("eof") tokenKindInvalid = tokenKind("invalid") @@ -265,8 +266,10 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil case KindIDExpansion: return newSymbolToken(tokenKindExpantion, newPosition(tok.Row+1, tok.Col+1)), nil - case KindIDMetadataMarker: - return newSymbolToken(tokenKindMetaDataMarker, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDLParen: + return newSymbolToken(tokenKindLParen, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDRParen: + return newSymbolToken(tokenKindRParen, newPosition(tok.Row+1, tok.Col+1)), nil default: return newInvalidToken(string(tok.Lexeme), newPosition(tok.Row+1, tok.Col+1)), nil } diff --git a/spec/lexer_test.go b/spec/lexer_test.go index 4fab8db..621eff5 100644 --- a/spec/lexer_test.go +++ b/spec/lexer_test.go @@ -36,7 +36,7 @@ func TestLexer_Run(t *testing.T) { }{ { caption: "the lexer can recognize all kinds of tokens", - src: `id"terminal"'string':|;@...#%`, + src: `id"terminal"'string':|;@...#()`, tokens: []*token{ idTok("id"), termPatTok("terminal"), @@ -47,7 +47,8 @@ func TestLexer_Run(t *testing.T) { symTok(tokenKindLabelMarker), symTok(tokenKindExpantion), symTok(tokenKindDirectiveMarker), - symTok(tokenKindMetaDataMarker), + symTok(tokenKindLParen), + symTok(tokenKindRParen), newEOFToken(), }, }, diff --git a/spec/lexspec.json b/spec/lexspec.json index ff8ff0d..6a11a4a 100644 --- a/spec/lexspec.json +++ b/spec/lexspec.json @@ -118,8 +118,12 @@ "pattern": "#" }, { - "kind": "metadata_marker", - "pattern": "%" + "kind": "l_paren", + "pattern": "\\(" + }, + { + "kind": "r_paren", + "pattern": "\\)" } ] } diff --git a/spec/parser.go b/spec/parser.go index a1d23f0..3b5907e 100644 --- a/spec/parser.go +++ b/spec/parser.go @@ -9,7 +9,7 @@ import ( ) type RootNode struct { - MetaData []*DirectiveNode + Directives []*DirectiveNode Productions []*ProductionNode LexProductions []*ProductionNode Fragments []*FragmentNode @@ -58,6 +58,7 @@ type ParameterNode struct { ID string Pattern string String string + Group []*DirectiveNode Expansion bool Pos Position } @@ -134,14 +135,14 @@ func (p *parser) parseRoot() *RootNode { } }() - var metadata []*DirectiveNode + var dirs []*DirectiveNode var prods []*ProductionNode var lexProds []*ProductionNode var fragments []*FragmentNode for { - md := p.parseMetaData() - if md != nil { - metadata = append(metadata, md) + dir := p.parseTopLevelDirective() + if dir != nil { + dirs = append(dirs, dir) continue } @@ -167,14 +168,14 @@ func (p *parser) parseRoot() *RootNode { } return &RootNode{ - MetaData: metadata, + Directives: dirs, Productions: prods, LexProductions: lexProds, Fragments: fragments, } } -func (p *parser) parseMetaData() *DirectiveNode { +func (p *parser) parseTopLevelDirective() *DirectiveNode { defer func() { err := recover() if err == nil { @@ -187,35 +188,21 @@ func (p *parser) parseMetaData() *DirectiveNode { } p.errs = append(p.errs, specErr) - p.skipOverTo(tokenKindNewline) + p.skipOverTo(tokenKindSemicolon) }() - p.consume(tokenKindNewline) - - if !p.consume(tokenKindMetaDataMarker) { + dir := p.parseDirective() + if dir == nil { return nil } - mdPos := p.lastTok.pos - if !p.consume(tokenKindID) { - raiseSyntaxError(p.pos.Row, synErrNoMDName) - } - name := p.lastTok.text + p.consume(tokenKindNewline) - var params []*ParameterNode - for { - param := p.parseParameter() - if param == nil { - break - } - params = append(params, param) + if !p.consume(tokenKindSemicolon) { + raiseSyntaxError(p.pos.Row, synErrTopLevelDirNoSemicolon) } - return &DirectiveNode{ - Name: name, - Parameters: params, - Pos: mdPos, - } + return dir } func (p *parser) parseFragment() *FragmentNode { @@ -428,6 +415,8 @@ func (p *parser) parseElement() *ElementNode { } func (p *parser) parseDirective() *DirectiveNode { + p.consume(tokenKindNewline) + if !p.consume(tokenKindDirectiveMarker) { return nil } @@ -472,6 +461,30 @@ func (p *parser) parseParameter() *ParameterNode { String: p.lastTok.text, Pos: p.lastTok.pos, } + case p.consume(tokenKindLParen): + pos := p.lastTok.pos + var g []*DirectiveNode + for { + dir := p.parseDirective() + if dir == nil { + break + } + g = append(g, dir) + } + if !p.consume(tokenKindRParen) { + raiseSyntaxError(p.pos.Row, synErrUnclosedDirGroup) + } + if len(g) == 0 { + // Set an empty slice representing an empty directive group to distinguish between the following two cases. + // + // - #prec (); // vartan allows this case. + // - #prec; // This case will raise an error. + g = []*DirectiveNode{} + } + param = &ParameterNode{ + Group: g, + Pos: pos, + } default: return nil } diff --git a/spec/parser_test.go b/spec/parser_test.go index 2a44acd..de2c6f7 100644 --- a/spec/parser_test.go +++ b/spec/parser_test.go @@ -8,6 +8,20 @@ import ( ) func TestParse(t *testing.T) { + name := func(param *ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "name", + Parameters: []*ParameterNode{param}, + } + } + + prec := func(param *ParameterNode) *DirectiveNode { + return &DirectiveNode{ + Name: "prec", + Parameters: []*ParameterNode{param}, + } + } + leftAssoc := func(params ...*ParameterNode) *DirectiveNode { return &DirectiveNode{ Name: "left", @@ -66,6 +80,11 @@ func TestParse(t *testing.T) { param.Expansion = true return param } + group := func(dirs ...*DirectiveNode) *ParameterNode { + return &ParameterNode{ + Group: dirs, + } + } withParamPos := func(param *ParameterNode, pos Position) *ParameterNode { param.Pos = pos return param @@ -122,6 +141,82 @@ func TestParse(t *testing.T) { synErr *SyntaxError }{ { + caption: "a grammar can contain top-level directives", + src: ` +#name test; + +#prec ( + #left a b + #right c d +); +`, + ast: &RootNode{ + Directives: []*DirectiveNode{ + withDirPos( + name( + withParamPos( + idParam("test"), + newPos(2), + ), + ), + newPos(2), + ), + withDirPos( + prec( + withParamPos( + group( + withDirPos( + leftAssoc( + withParamPos( + idParam("a"), + newPos(5), + ), + withParamPos( + idParam("b"), + newPos(5), + ), + ), + newPos(5), + ), + withDirPos( + rightAssoc( + withParamPos( + idParam("c"), + newPos(6), + ), + withParamPos( + idParam("d"), + newPos(6), + ), + ), + newPos(6), + ), + ), + newPos(4), + ), + ), + newPos(4), + ), + }, + }, + }, + { + caption: "a top-level directive must be followed by ';'", + src: ` +#name test +`, + synErr: synErrTopLevelDirNoSemicolon, + }, + { + caption: "a directive group must be closed by ')'", + src: ` +#prec ( + #left a b +; +`, + synErr: synErrUnclosedDirGroup, + }, + { caption: "single production is a valid grammar", src: `a: "a";`, ast: &RootNode{ @@ -643,10 +738,12 @@ foo { caption: "a grammar can contain left and right associativities", src: ` -%left l1 l2 -%left l3 -%right r1 r2 -%right r3 +#prec ( + #left l1 l2 + #left l3 + #right r1 r2 + #right r3 +); s : id l1 id l2 id l3 id @@ -671,33 +768,43 @@ id : "[A-Za-z0-9_]+"; `, ast: &RootNode{ - MetaData: []*DirectiveNode{ + Directives: []*DirectiveNode{ withDirPos( - leftAssoc( - withParamPos(idParam("l1"), newPos(2)), - withParamPos(idParam("l2"), newPos(2)), + prec( + withParamPos( + group( + withDirPos( + leftAssoc( + withParamPos(idParam("l1"), newPos(3)), + withParamPos(idParam("l2"), newPos(3)), + ), + newPos(3), + ), + withDirPos( + leftAssoc( + withParamPos(idParam("l3"), newPos(4)), + ), + newPos(4), + ), + withDirPos( + rightAssoc( + withParamPos(idParam("r1"), newPos(5)), + withParamPos(idParam("r2"), newPos(5)), + ), + newPos(5), + ), + withDirPos( + rightAssoc( + withParamPos(idParam("r3"), newPos(6)), + ), + newPos(6), + ), + ), + newPos(2), + ), ), newPos(2), ), - withDirPos( - leftAssoc( - withParamPos(idParam("l3"), newPos(3)), - ), - newPos(3), - ), - withDirPos( - rightAssoc( - withParamPos(idParam("r1"), newPos(4)), - withParamPos(idParam("r2"), newPos(4)), - ), - newPos(4), - ), - withDirPos( - rightAssoc( - withParamPos(idParam("r3"), newPos(5)), - ), - newPos(5), - ), }, Productions: []*ProductionNode{ prod("s", @@ -756,11 +863,11 @@ func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { if len(root.Productions) != len(expected.Productions) { t.Fatalf("unexpected length of productions; want: %v, got: %v", len(expected.Productions), len(root.Productions)) } - if len(root.MetaData) != len(expected.MetaData) { - t.Fatalf("unexpected length of meta data; want: %v, got: %v", len(expected.MetaData), len(root.MetaData)) + if len(root.Directives) != len(expected.Directives) { + t.Fatalf("unexpected length of top-level directives; want: %v, got: %v", len(expected.Directives), len(root.Directives)) } - for i, md := range root.MetaData { - testDirectives(t, []*DirectiveNode{md}, []*DirectiveNode{expected.MetaData[i]}, true) + for i, dir := range root.Directives { + testDirectives(t, []*DirectiveNode{dir}, []*DirectiveNode{expected.Directives[i]}, true) } for i, prod := range root.Productions { testProductionNode(t, prod, expected.Productions[i], checkPosition) diff --git a/spec/syntax_error.go b/spec/syntax_error.go index fdf9c40..3b44d2d 100644 --- a/spec/syntax_error.go +++ b/spec/syntax_error.go @@ -24,14 +24,15 @@ var ( synErrEmptyString = newSyntaxError("a string must include at least one character") // syntax errors - synErrInvalidToken = newSyntaxError("invalid token") - synErrNoMDName = newSyntaxError("a metadata name is missing") - synErrNoProductionName = newSyntaxError("a production name is missing") - synErrNoColon = newSyntaxError("the colon must precede alternatives") - synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative") - synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol") - synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @") - synErrNoDirectiveName = newSyntaxError("a directive needs a name") - synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline") - synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element") + synErrInvalidToken = newSyntaxError("invalid token") + synErrTopLevelDirNoSemicolon = newSyntaxError("a top-level directive must be followed by ;") + synErrNoProductionName = newSyntaxError("a production name is missing") + synErrNoColon = newSyntaxError("the colon must precede alternatives") + synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative") + synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol") + synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @") + synErrNoDirectiveName = newSyntaxError("a directive needs a name") + synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )") + synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline") + synErrFragmentNoPattern = newSyntaxError("a fragment needs one pattern element") ) diff --git a/spec/vartan_lexer.go b/spec/vartan_lexer.go index 8c9e7ad..146748a 100644 --- a/spec/vartan_lexer.go +++ b/spec/vartan_lexer.go @@ -356,14 +356,15 @@ const ( KindIDLabelMarker KindID = 11 KindIDExpansion KindID = 12 KindIDDirectiveMarker KindID = 13 - KindIDMetadataMarker KindID = 14 - KindIDPattern KindID = 15 - KindIDTerminalClose KindID = 16 - KindIDEscapeSymbol KindID = 17 - KindIDCharSeq KindID = 18 - KindIDEscapedQuot KindID = 19 - KindIDEscapedBackSlash KindID = 20 - KindIDStringLiteralClose KindID = 21 + KindIDLParen KindID = 14 + KindIDRParen KindID = 15 + KindIDPattern KindID = 16 + KindIDTerminalClose KindID = 17 + KindIDEscapeSymbol KindID = 18 + KindIDCharSeq KindID = 19 + KindIDEscapedQuot KindID = 20 + KindIDEscapedBackSlash KindID = 21 + KindIDStringLiteralClose KindID = 22 ) const ( @@ -381,7 +382,8 @@ const ( KindNameLabelMarker = "label_marker" KindNameExpansion = "expansion" KindNameDirectiveMarker = "directive_marker" - KindNameMetadataMarker = "metadata_marker" + KindNameLParen = "l_paren" + KindNameRParen = "r_paren" KindNamePattern = "pattern" KindNameTerminalClose = "terminal_close" KindNameEscapeSymbol = "escape_symbol" @@ -422,8 +424,10 @@ func KindIDToName(id KindID) string { return KindNameExpansion case KindIDDirectiveMarker: return KindNameDirectiveMarker - case KindIDMetadataMarker: - return KindNameMetadataMarker + case KindIDLParen: + return KindNameLParen + case KindIDRParen: + return KindNameRParen case KindIDPattern: return KindNamePattern case KindIDTerminalClose: @@ -467,7 +471,7 @@ func NewLexSpec() *lexSpec { pop: [][]bool{ nil, { - false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, }, { false, false, true, false, @@ -479,7 +483,7 @@ func NewLexSpec() *lexSpec { push: [][]ModeID{ nil, { - 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, }, { 0, 0, 0, 0, @@ -505,7 +509,7 @@ func NewLexSpec() *lexSpec { { 0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 4, 5, 0, 0, 2, 6, 7, 8, 9, - 10, 11, 12, 13, 14, + 10, 11, 12, 13, 14, 15, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -536,7 +540,8 @@ func NewLexSpec() *lexSpec { KindIDLabelMarker, KindIDExpansion, KindIDDirectiveMarker, - KindIDMetadataMarker, + KindIDLParen, + KindIDRParen, }, { KindIDNil, @@ -568,7 +573,8 @@ func NewLexSpec() *lexSpec { KindNameLabelMarker, KindNameExpansion, KindNameDirectiveMarker, - KindNameMetadataMarker, + KindNameLParen, + KindNameRParen, KindNamePattern, KindNameTerminalClose, KindNameEscapeSymbol, @@ -587,7 +593,7 @@ func NewLexSpec() *lexSpec { { 0, 1, 2, 3, 4, 5, 6, 7, 6, 8, 6, 9, 6, 10, 6, 11, 12, 6, 13, 14, 6, 15, 16, 6, 17, 18, 19, 20, 21, 22, 23, 24, 24, 25, 26, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, }, { 0, 1, 2, 3, 2, 4, 2, 5, 2, 6, 2, 7, 8, 2, 9, 10, 2, 11, 12, 2, @@ -633,7 +639,7 @@ func NewLexSpec() *lexSpec { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, -1, 1, -1, 1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, -1, -1, -1, 1, 1, 1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, 1, 1, 1, @@ -978,7 +984,7 @@ func NewLexSpec() *lexSpec { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 13, 13, 15, 18, 18, 18, 21, 2, 35, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 36, 43, 0, 44, 0, 37, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 36, 43, 0, 0, 0, 37, 44, 45, 0, 0, 0, 0, 33, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38, 40, 0, 0, 0, 0, 41, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 32, 0, 32, 32, 32, 32, 32, 24, 32, |