diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2022-03-28 22:31:30 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2022-03-29 01:45:05 +0900 |
commit | ed43562cf58e8c0f9390421848879308fdfc60cb (patch) | |
tree | 16beff7d233b95ae53d2c8019bc47be378f304b8 | |
parent | Simplify the syntax of #ast directive (diff) | |
download | urubu-ed43562cf58e8c0f9390421848879308fdfc60cb.tar.gz urubu-ed43562cf58e8c0f9390421848879308fdfc60cb.tar.xz |
Add label notation
-rw-r--r-- | driver/parser_test.go | 60 | ||||
-rw-r--r-- | grammar/grammar.go | 23 | ||||
-rw-r--r-- | grammar/semantic_error.go | 2 | ||||
-rw-r--r-- | spec/lexer.go | 3 | ||||
-rw-r--r-- | spec/lexer_test.go | 3 | ||||
-rw-r--r-- | spec/lexspec.json | 4 | ||||
-rw-r--r-- | spec/parser.go | 29 | ||||
-rw-r--r-- | spec/parser_test.go | 104 | ||||
-rw-r--r-- | spec/syntax_error.go | 2 | ||||
-rw-r--r-- | spec/vartan_lexer.go | 48 |
10 files changed, 252 insertions, 26 deletions
diff --git a/driver/parser_test.go b/driver/parser_test.go index 3f67bf2..dfc2fe5 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -717,6 +717,66 @@ error: 'error' #skip; `, specErr: true, }, + // A label must be unique in an alternative. + { + specSrc: ` +%name test + +s + : foo@x bar@x + ; + +foo: 'foo'; +bar: 'bar'; +`, + specErr: true, + }, + // The same label can be used between different alternatives. + { + specSrc: ` +%name test + +s + : foo@x bar + | foo@x + ; + +foo: 'foo'; +bar: 'bar'; +`, + src: `foo`, + }, + // A label cannot be the same name as terminal symbols. + { + specSrc: ` +%name test + +s + : foo bar@foo + ; + +foo: 'foo'; +bar: 'bar'; +`, + specErr: true, + }, + // A label cannot be the same name as non-terminal symbols. + { + specSrc: ` +%name test + +s + : foo@a + ; +a + : bar + ; + +foo: 'foo'; +bar: 'bar'; +`, + specErr: true, + }, } classes := []grammar.Class{ diff --git a/grammar/grammar.go b/grammar/grammar.go index 9d1933b..3f1117c 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -677,6 +677,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd LOOP_RHS: for _, alt := range prod.RHS { altSyms := make([]symbol, len(alt.Elements)) + labels := map[string]int{} for i, elem := range alt.Elements { var sym symbol if elem.Pattern != "" { @@ -707,6 +708,28 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd } } altSyms[i] = sym + + if elem.Label != nil { + if _, added := labels[elem.Label.Name]; added { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrDuplicateLabel, + Detail: elem.Label.Name, + Row: elem.Label.Pos.Row, + Col: elem.Label.Pos.Col, + }) + continue LOOP_RHS + } + if _, found := symTab.toSymbol(elem.Label.Name); found { + b.errs = append(b.errs, &verr.SpecError{ + Cause: semErrInvalidLabel, + Detail: elem.Label.Name, + Row: elem.Label.Pos.Row, + Col: elem.Label.Pos.Col, + }) + continue LOOP_RHS + } + labels[elem.Label.Name] = i + } } p, err := newProduction(lhsSym, altSyms) diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go index d540c03..4326d81 100644 --- a/grammar/semantic_error.go +++ b/grammar/semantic_error.go @@ -27,6 +27,8 @@ var ( semErrDuplicateTerminal = newSemanticError("duplicate terminal") semErrDuplicateName = newSemanticError("duplicate names are not allowed between terminals and non-terminals") semErrErrSymIsReserved = newSemanticError("symbol 'error' is reserved as a terminal symbol") + semErrDuplicateLabel = newSemanticError("a label must be unique in an alternative") + semErrInvalidLabel = newSemanticError("a label must differ from terminal symbols or non-terminal symbols") semErrDirInvalidName = newSemanticError("invalid directive name") semErrDirInvalidParam = newSemanticError("invalid parameter") ) diff --git a/spec/lexer.go b/spec/lexer.go index d1cb67d..51791be 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -23,6 +23,7 @@ const ( tokenKindColon = tokenKind(":") tokenKindOr = tokenKind("|") tokenKindSemicolon = tokenKind(";") + tokenKindLabelMarker = tokenKind("@") tokenKindDirectiveMarker = tokenKind("#") tokenKindPosition = tokenKind("$") tokenKindExpantion = tokenKind("...") @@ -269,6 +270,8 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { return newSymbolToken(tokenKindOr, newPosition(tok.Row+1, tok.Col+1)), nil case KindIDSemicolon: return newSymbolToken(tokenKindSemicolon, newPosition(tok.Row+1, tok.Col+1)), nil + case KindIDLabelMarker: + return newSymbolToken(tokenKindLabelMarker, newPosition(tok.Row+1, tok.Col+1)), nil case KindIDDirectiveMarker: return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil case KindIDPosition: diff --git a/spec/lexer_test.go b/spec/lexer_test.go index 08939ee..5588d0b 100644 --- a/spec/lexer_test.go +++ b/spec/lexer_test.go @@ -40,7 +40,7 @@ func TestLexer_Run(t *testing.T) { }{ { caption: "the lexer can recognize all kinds of tokens", - src: `id"terminal"'string':|;$1...#%`, + src: `id"terminal"'string':|;@$1...#%`, tokens: []*token{ idTok("id"), termPatTok("terminal"), @@ -48,6 +48,7 @@ func TestLexer_Run(t *testing.T) { symTok(tokenKindColon), symTok(tokenKindOr), symTok(tokenKindSemicolon), + symTok(tokenKindLabelMarker), posTok(1), symTok(tokenKindExpantion), symTok(tokenKindDirectiveMarker), diff --git a/spec/lexspec.json b/spec/lexspec.json index a60624e..838763b 100644 --- a/spec/lexspec.json +++ b/spec/lexspec.json @@ -106,6 +106,10 @@ "pattern": ";" }, { + "kind": "label_marker", + "pattern": "@" + }, + { "kind": "position", "pattern": "$(0|[1-9][0-9]*)" }, diff --git a/spec/parser.go b/spec/parser.go index a453538..a9e4d62 100644 --- a/spec/parser.go +++ b/spec/parser.go @@ -38,10 +38,16 @@ type AlternativeNode struct { type ElementNode struct { ID string Pattern string + Label *LabelNode Literally bool Pos Position } +type LabelNode struct { + Name string + Pos Position +} + type DirectiveNode struct { Name string Parameters []*ParameterNode @@ -383,25 +389,40 @@ func (p *parser) parseAlternative() *AlternativeNode { } func (p *parser) parseElement() *ElementNode { + var elem *ElementNode switch { case p.consume(tokenKindID): - return &ElementNode{ + elem = &ElementNode{ ID: p.lastTok.text, Pos: p.lastTok.pos, } case p.consume(tokenKindTerminalPattern): - return &ElementNode{ + elem = &ElementNode{ Pattern: p.lastTok.text, Pos: p.lastTok.pos, } case p.consume(tokenKindStringLiteral): - return &ElementNode{ + elem = &ElementNode{ Pattern: p.lastTok.text, Literally: true, Pos: p.lastTok.pos, } + default: + if p.consume(tokenKindLabelMarker) { + raiseSyntaxError(p.pos.Row, synErrLabelWithNoSymbol) + } + return nil + } + if p.consume(tokenKindLabelMarker) { + if !p.consume(tokenKindID) { + raiseSyntaxError(p.pos.Row, synErrNoLabel) + } + elem.Label = &LabelNode{ + Name: p.lastTok.text, + Pos: p.lastTok.pos, + } } - return nil + return elem } func (p *parser) parseDirective() *DirectiveNode { diff --git a/spec/parser_test.go b/spec/parser_test.go index 154aea0..e81f95e 100644 --- a/spec/parser_test.go +++ b/spec/parser_test.go @@ -91,6 +91,19 @@ func TestParse(t *testing.T) { Pattern: p, } } + label := func(name string) *LabelNode { + return &LabelNode{ + Name: name, + } + } + withLabelPos := func(label *LabelNode, pos Position) *LabelNode { + label.Pos = pos + return label + } + withLabel := func(elem *ElementNode, label *LabelNode) *ElementNode { + elem.Label = label + return elem + } withElemPos := func(elem *ElementNode, pos Position) *ElementNode { elem.Pos = pos return elem @@ -536,6 +549,97 @@ fragment number: "[0-9]"; }, }, { + caption: "a symbol can have a label", + src: ` +expr + : term@lhs add term@rhs + ; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + withProdPos( + prod("expr", + withAltPos( + alt( + withElemPos( + withLabel( + id("term"), + withLabelPos( + label("lhs"), + newPos(3), + ), + ), + newPos(3), + ), + withElemPos( + id("add"), + newPos(3), + ), + withElemPos( + withLabel( + id("term"), + withLabelPos( + label("rhs"), + newPos(3), + ), + ), + newPos(3), + ), + ), + newPos(3), + ), + ), + newPos(2), + ), + }, + }, + }, + { + caption: "a label must be an identifier, not a string", + src: ` +foo + : bar@'baz' + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "a label must be an identifier, not a pattern", + src: ` +foo + : bar@"baz" + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "the symbol marker @ must be followed by an identifier", + src: ` +foo + : bar@ + ; +`, + synErr: synErrNoLabel, + }, + { + caption: "a symbol cannot have more than or equal to two labels", + src: ` +foo + : bar@baz@bra + ; +`, + synErr: synErrLabelWithNoSymbol, + }, + { + caption: "a label must follow a symbol", + src: ` +foo + : @baz + ; +`, + synErr: synErrLabelWithNoSymbol, + }, + { caption: "a grammar can contain left and right associativities", src: ` %left l1 l2 diff --git a/spec/syntax_error.go b/spec/syntax_error.go index ae65b35..741d578 100644 --- a/spec/syntax_error.go +++ b/spec/syntax_error.go @@ -29,6 +29,8 @@ var ( synErrNoProductionName = newSyntaxError("a production name is missing") synErrNoColon = newSyntaxError("the colon must precede alternatives") synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative") + synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol") + synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @") synErrNoDirectiveName = newSyntaxError("a directive needs a name") synErrProdDirNoNewline = newSyntaxError("a production directive must be followed by a newline") synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline") diff --git a/spec/vartan_lexer.go b/spec/vartan_lexer.go index 063e429..3042243 100644 --- a/spec/vartan_lexer.go +++ b/spec/vartan_lexer.go @@ -353,17 +353,18 @@ const ( KindIDColon KindID = 8 KindIDOr KindID = 9 KindIDSemicolon KindID = 10 - KindIDPosition KindID = 11 - KindIDExpansion KindID = 12 - KindIDDirectiveMarker KindID = 13 - KindIDMetadataMarker KindID = 14 - KindIDPattern KindID = 15 - KindIDTerminalClose KindID = 16 - KindIDEscapeSymbol KindID = 17 - KindIDCharSeq KindID = 18 - KindIDEscapedQuot KindID = 19 - KindIDEscapedBackSlash KindID = 20 - KindIDStringLiteralClose KindID = 21 + KindIDLabelMarker KindID = 11 + KindIDPosition KindID = 12 + KindIDExpansion KindID = 13 + KindIDDirectiveMarker KindID = 14 + KindIDMetadataMarker KindID = 15 + KindIDPattern KindID = 16 + KindIDTerminalClose KindID = 17 + KindIDEscapeSymbol KindID = 18 + KindIDCharSeq KindID = 19 + KindIDEscapedQuot KindID = 20 + KindIDEscapedBackSlash KindID = 21 + KindIDStringLiteralClose KindID = 22 ) const ( @@ -378,6 +379,7 @@ const ( KindNameColon = "colon" KindNameOr = "or" KindNameSemicolon = "semicolon" + KindNameLabelMarker = "label_marker" KindNamePosition = "position" KindNameExpansion = "expansion" KindNameDirectiveMarker = "directive_marker" @@ -416,6 +418,8 @@ func KindIDToName(id KindID) string { return KindNameOr case KindIDSemicolon: return KindNameSemicolon + case KindIDLabelMarker: + return KindNameLabelMarker case KindIDPosition: return KindNamePosition case KindIDExpansion: @@ -467,7 +471,7 @@ func NewLexSpec() *lexSpec { pop: [][]bool{ nil, { - false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, + false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, }, { false, false, true, false, @@ -479,7 +483,7 @@ func NewLexSpec() *lexSpec { push: [][]ModeID{ nil, { - 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, }, { 0, 0, 0, 0, @@ -504,8 +508,8 @@ func NewLexSpec() *lexSpec { nil, { 0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 4, 5, 0, 11, 0, 0, 2, 6, 7, - 8, 9, 10, 11, 12, 13, 14, + 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 4, 5, 0, 12, 0, 0, 2, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -533,6 +537,7 @@ func NewLexSpec() *lexSpec { KindIDColon, KindIDOr, KindIDSemicolon, + KindIDLabelMarker, KindIDPosition, KindIDExpansion, KindIDDirectiveMarker, @@ -565,6 +570,7 @@ func NewLexSpec() *lexSpec { KindNameColon, KindNameOr, KindNameSemicolon, + KindNameLabelMarker, KindNamePosition, KindNameExpansion, KindNameDirectiveMarker, @@ -587,7 +593,7 @@ func NewLexSpec() *lexSpec { { 0, 1, 2, 3, 4, 5, 6, 7, 6, 8, 6, 9, 6, 10, 6, 11, 12, 6, 13, 14, 6, 15, 16, 6, 17, 18, 19, 20, 21, 22, 23, 24, 24, 25, 26, 27, 28, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, }, { 0, 1, 2, 3, 2, 4, 2, 5, 2, 6, 2, 7, 8, 2, 9, 10, 2, 11, 12, 2, @@ -635,7 +641,7 @@ func NewLexSpec() *lexSpec { 5, 5, 5, 5, 5, 1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, -1, 1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1, - -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, -1, -1, -1, -1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, @@ -979,9 +985,9 @@ func NewLexSpec() *lexSpec { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 13, 13, 15, 18, 18, 18, 21, 2, 37, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 38, 45, 33, 46, 0, 39, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 38, 46, 33, 47, 0, 39, 0, 0, 0, 0, 0, 0, 35, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 42, 0, 0, 0, 0, - 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, + 43, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 32, 0, 32, 32, 32, 32, 32, 24, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0, 41, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -1044,10 +1050,10 @@ func NewLexSpec() *lexSpec { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 22, 43, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, + 22, 22, 22, 44, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 2, 0, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 5, 36, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 5, 36, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |