aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2022-03-28 22:31:30 +0900
committerRyo Nihei <nihei.dev@gmail.com>2022-03-29 01:45:05 +0900
commited43562cf58e8c0f9390421848879308fdfc60cb (patch)
tree16beff7d233b95ae53d2c8019bc47be378f304b8
parentSimplify the syntax of #ast directive (diff)
downloadurubu-ed43562cf58e8c0f9390421848879308fdfc60cb.tar.gz
urubu-ed43562cf58e8c0f9390421848879308fdfc60cb.tar.xz
Add label notation
-rw-r--r--driver/parser_test.go60
-rw-r--r--grammar/grammar.go23
-rw-r--r--grammar/semantic_error.go2
-rw-r--r--spec/lexer.go3
-rw-r--r--spec/lexer_test.go3
-rw-r--r--spec/lexspec.json4
-rw-r--r--spec/parser.go29
-rw-r--r--spec/parser_test.go104
-rw-r--r--spec/syntax_error.go2
-rw-r--r--spec/vartan_lexer.go48
10 files changed, 252 insertions, 26 deletions
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 3f67bf2..dfc2fe5 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -717,6 +717,66 @@ error: 'error' #skip;
`,
specErr: true,
},
+ // A label must be unique in an alternative.
+ {
+ specSrc: `
+%name test
+
+s
+ : foo@x bar@x
+ ;
+
+foo: 'foo';
+bar: 'bar';
+`,
+ specErr: true,
+ },
+ // The same label can be used between different alternatives.
+ {
+ specSrc: `
+%name test
+
+s
+ : foo@x bar
+ | foo@x
+ ;
+
+foo: 'foo';
+bar: 'bar';
+`,
+ src: `foo`,
+ },
+ // A label cannot be the same name as terminal symbols.
+ {
+ specSrc: `
+%name test
+
+s
+ : foo bar@foo
+ ;
+
+foo: 'foo';
+bar: 'bar';
+`,
+ specErr: true,
+ },
+ // A label cannot be the same name as non-terminal symbols.
+ {
+ specSrc: `
+%name test
+
+s
+ : foo@a
+ ;
+a
+ : bar
+ ;
+
+foo: 'foo';
+bar: 'bar';
+`,
+ specErr: true,
+ },
}
classes := []grammar.Class{
diff --git a/grammar/grammar.go b/grammar/grammar.go
index 9d1933b..3f1117c 100644
--- a/grammar/grammar.go
+++ b/grammar/grammar.go
@@ -677,6 +677,7 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
LOOP_RHS:
for _, alt := range prod.RHS {
altSyms := make([]symbol, len(alt.Elements))
+ labels := map[string]int{}
for i, elem := range alt.Elements {
var sym symbol
if elem.Pattern != "" {
@@ -707,6 +708,28 @@ func (b *GrammarBuilder) genProductionsAndActions(root *spec.RootNode, symTabAnd
}
}
altSyms[i] = sym
+
+ if elem.Label != nil {
+ if _, added := labels[elem.Label.Name]; added {
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrDuplicateLabel,
+ Detail: elem.Label.Name,
+ Row: elem.Label.Pos.Row,
+ Col: elem.Label.Pos.Col,
+ })
+ continue LOOP_RHS
+ }
+ if _, found := symTab.toSymbol(elem.Label.Name); found {
+ b.errs = append(b.errs, &verr.SpecError{
+ Cause: semErrInvalidLabel,
+ Detail: elem.Label.Name,
+ Row: elem.Label.Pos.Row,
+ Col: elem.Label.Pos.Col,
+ })
+ continue LOOP_RHS
+ }
+ labels[elem.Label.Name] = i
+ }
}
p, err := newProduction(lhsSym, altSyms)
diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go
index d540c03..4326d81 100644
--- a/grammar/semantic_error.go
+++ b/grammar/semantic_error.go
@@ -27,6 +27,8 @@ var (
semErrDuplicateTerminal = newSemanticError("duplicate terminal")
semErrDuplicateName = newSemanticError("duplicate names are not allowed between terminals and non-terminals")
semErrErrSymIsReserved = newSemanticError("symbol 'error' is reserved as a terminal symbol")
+ semErrDuplicateLabel = newSemanticError("a label must be unique in an alternative")
+ semErrInvalidLabel = newSemanticError("a label must differ from terminal symbols or non-terminal symbols")
semErrDirInvalidName = newSemanticError("invalid directive name")
semErrDirInvalidParam = newSemanticError("invalid parameter")
)
diff --git a/spec/lexer.go b/spec/lexer.go
index d1cb67d..51791be 100644
--- a/spec/lexer.go
+++ b/spec/lexer.go
@@ -23,6 +23,7 @@ const (
tokenKindColon = tokenKind(":")
tokenKindOr = tokenKind("|")
tokenKindSemicolon = tokenKind(";")
+ tokenKindLabelMarker = tokenKind("@")
tokenKindDirectiveMarker = tokenKind("#")
tokenKindPosition = tokenKind("$")
tokenKindExpantion = tokenKind("...")
@@ -269,6 +270,8 @@ func (l *lexer) lexAndSkipWSs() (*token, error) {
return newSymbolToken(tokenKindOr, newPosition(tok.Row+1, tok.Col+1)), nil
case KindIDSemicolon:
return newSymbolToken(tokenKindSemicolon, newPosition(tok.Row+1, tok.Col+1)), nil
+ case KindIDLabelMarker:
+ return newSymbolToken(tokenKindLabelMarker, newPosition(tok.Row+1, tok.Col+1)), nil
case KindIDDirectiveMarker:
return newSymbolToken(tokenKindDirectiveMarker, newPosition(tok.Row+1, tok.Col+1)), nil
case KindIDPosition:
diff --git a/spec/lexer_test.go b/spec/lexer_test.go
index 08939ee..5588d0b 100644
--- a/spec/lexer_test.go
+++ b/spec/lexer_test.go
@@ -40,7 +40,7 @@ func TestLexer_Run(t *testing.T) {
}{
{
caption: "the lexer can recognize all kinds of tokens",
- src: `id"terminal"'string':|;$1...#%`,
+ src: `id"terminal"'string':|;@$1...#%`,
tokens: []*token{
idTok("id"),
termPatTok("terminal"),
@@ -48,6 +48,7 @@ func TestLexer_Run(t *testing.T) {
symTok(tokenKindColon),
symTok(tokenKindOr),
symTok(tokenKindSemicolon),
+ symTok(tokenKindLabelMarker),
posTok(1),
symTok(tokenKindExpantion),
symTok(tokenKindDirectiveMarker),
diff --git a/spec/lexspec.json b/spec/lexspec.json
index a60624e..838763b 100644
--- a/spec/lexspec.json
+++ b/spec/lexspec.json
@@ -106,6 +106,10 @@
"pattern": ";"
},
{
+ "kind": "label_marker",
+ "pattern": "@"
+ },
+ {
"kind": "position",
"pattern": "$(0|[1-9][0-9]*)"
},
diff --git a/spec/parser.go b/spec/parser.go
index a453538..a9e4d62 100644
--- a/spec/parser.go
+++ b/spec/parser.go
@@ -38,10 +38,16 @@ type AlternativeNode struct {
type ElementNode struct {
ID string
Pattern string
+ Label *LabelNode
Literally bool
Pos Position
}
+type LabelNode struct {
+ Name string
+ Pos Position
+}
+
type DirectiveNode struct {
Name string
Parameters []*ParameterNode
@@ -383,25 +389,40 @@ func (p *parser) parseAlternative() *AlternativeNode {
}
func (p *parser) parseElement() *ElementNode {
+ var elem *ElementNode
switch {
case p.consume(tokenKindID):
- return &ElementNode{
+ elem = &ElementNode{
ID: p.lastTok.text,
Pos: p.lastTok.pos,
}
case p.consume(tokenKindTerminalPattern):
- return &ElementNode{
+ elem = &ElementNode{
Pattern: p.lastTok.text,
Pos: p.lastTok.pos,
}
case p.consume(tokenKindStringLiteral):
- return &ElementNode{
+ elem = &ElementNode{
Pattern: p.lastTok.text,
Literally: true,
Pos: p.lastTok.pos,
}
+ default:
+ if p.consume(tokenKindLabelMarker) {
+ raiseSyntaxError(p.pos.Row, synErrLabelWithNoSymbol)
+ }
+ return nil
+ }
+ if p.consume(tokenKindLabelMarker) {
+ if !p.consume(tokenKindID) {
+ raiseSyntaxError(p.pos.Row, synErrNoLabel)
+ }
+ elem.Label = &LabelNode{
+ Name: p.lastTok.text,
+ Pos: p.lastTok.pos,
+ }
}
- return nil
+ return elem
}
func (p *parser) parseDirective() *DirectiveNode {
diff --git a/spec/parser_test.go b/spec/parser_test.go
index 154aea0..e81f95e 100644
--- a/spec/parser_test.go
+++ b/spec/parser_test.go
@@ -91,6 +91,19 @@ func TestParse(t *testing.T) {
Pattern: p,
}
}
+ label := func(name string) *LabelNode {
+ return &LabelNode{
+ Name: name,
+ }
+ }
+ withLabelPos := func(label *LabelNode, pos Position) *LabelNode {
+ label.Pos = pos
+ return label
+ }
+ withLabel := func(elem *ElementNode, label *LabelNode) *ElementNode {
+ elem.Label = label
+ return elem
+ }
withElemPos := func(elem *ElementNode, pos Position) *ElementNode {
elem.Pos = pos
return elem
@@ -536,6 +549,97 @@ fragment number: "[0-9]";
},
},
{
+ caption: "a symbol can have a label",
+ src: `
+expr
+ : term@lhs add term@rhs
+ ;
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ withProdPos(
+ prod("expr",
+ withAltPos(
+ alt(
+ withElemPos(
+ withLabel(
+ id("term"),
+ withLabelPos(
+ label("lhs"),
+ newPos(3),
+ ),
+ ),
+ newPos(3),
+ ),
+ withElemPos(
+ id("add"),
+ newPos(3),
+ ),
+ withElemPos(
+ withLabel(
+ id("term"),
+ withLabelPos(
+ label("rhs"),
+ newPos(3),
+ ),
+ ),
+ newPos(3),
+ ),
+ ),
+ newPos(3),
+ ),
+ ),
+ newPos(2),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a label must be an identifier, not a string",
+ src: `
+foo
+ : bar@'baz'
+ ;
+`,
+ synErr: synErrNoLabel,
+ },
+ {
+ caption: "a label must be an identifier, not a pattern",
+ src: `
+foo
+ : bar@"baz"
+ ;
+`,
+ synErr: synErrNoLabel,
+ },
+ {
+ caption: "the symbol marker @ must be followed by an identifier",
+ src: `
+foo
+ : bar@
+ ;
+`,
+ synErr: synErrNoLabel,
+ },
+ {
+ caption: "a symbol cannot have more than or equal to two labels",
+ src: `
+foo
+ : bar@baz@bra
+ ;
+`,
+ synErr: synErrLabelWithNoSymbol,
+ },
+ {
+ caption: "a label must follow a symbol",
+ src: `
+foo
+ : @baz
+ ;
+`,
+ synErr: synErrLabelWithNoSymbol,
+ },
+ {
caption: "a grammar can contain left and right associativities",
src: `
%left l1 l2
diff --git a/spec/syntax_error.go b/spec/syntax_error.go
index ae65b35..741d578 100644
--- a/spec/syntax_error.go
+++ b/spec/syntax_error.go
@@ -29,6 +29,8 @@ var (
synErrNoProductionName = newSyntaxError("a production name is missing")
synErrNoColon = newSyntaxError("the colon must precede alternatives")
synErrNoSemicolon = newSyntaxError("the semicolon is missing at the last of an alternative")
+ synErrLabelWithNoSymbol = newSyntaxError("a label must follow a symbol")
+ synErrNoLabel = newSyntaxError("an identifier that represents a label is missing after the label marker @")
synErrNoDirectiveName = newSyntaxError("a directive needs a name")
synErrProdDirNoNewline = newSyntaxError("a production directive must be followed by a newline")
synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline")
diff --git a/spec/vartan_lexer.go b/spec/vartan_lexer.go
index 063e429..3042243 100644
--- a/spec/vartan_lexer.go
+++ b/spec/vartan_lexer.go
@@ -353,17 +353,18 @@ const (
KindIDColon KindID = 8
KindIDOr KindID = 9
KindIDSemicolon KindID = 10
- KindIDPosition KindID = 11
- KindIDExpansion KindID = 12
- KindIDDirectiveMarker KindID = 13
- KindIDMetadataMarker KindID = 14
- KindIDPattern KindID = 15
- KindIDTerminalClose KindID = 16
- KindIDEscapeSymbol KindID = 17
- KindIDCharSeq KindID = 18
- KindIDEscapedQuot KindID = 19
- KindIDEscapedBackSlash KindID = 20
- KindIDStringLiteralClose KindID = 21
+ KindIDLabelMarker KindID = 11
+ KindIDPosition KindID = 12
+ KindIDExpansion KindID = 13
+ KindIDDirectiveMarker KindID = 14
+ KindIDMetadataMarker KindID = 15
+ KindIDPattern KindID = 16
+ KindIDTerminalClose KindID = 17
+ KindIDEscapeSymbol KindID = 18
+ KindIDCharSeq KindID = 19
+ KindIDEscapedQuot KindID = 20
+ KindIDEscapedBackSlash KindID = 21
+ KindIDStringLiteralClose KindID = 22
)
const (
@@ -378,6 +379,7 @@ const (
KindNameColon = "colon"
KindNameOr = "or"
KindNameSemicolon = "semicolon"
+ KindNameLabelMarker = "label_marker"
KindNamePosition = "position"
KindNameExpansion = "expansion"
KindNameDirectiveMarker = "directive_marker"
@@ -416,6 +418,8 @@ func KindIDToName(id KindID) string {
return KindNameOr
case KindIDSemicolon:
return KindNameSemicolon
+ case KindIDLabelMarker:
+ return KindNameLabelMarker
case KindIDPosition:
return KindNamePosition
case KindIDExpansion:
@@ -467,7 +471,7 @@ func NewLexSpec() *lexSpec {
pop: [][]bool{
nil,
{
- false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
+ false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false,
},
{
false, false, true, false,
@@ -479,7 +483,7 @@ func NewLexSpec() *lexSpec {
push: [][]ModeID{
nil,
{
- 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0,
},
{
0, 0, 0, 0,
@@ -504,8 +508,8 @@ func NewLexSpec() *lexSpec {
nil,
{
0, 0, 1, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 4, 5, 0, 11, 0, 0, 2, 6, 7,
- 8, 9, 10, 11, 12, 13, 14,
+ 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 4, 5, 0, 12, 0, 0, 2, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
},
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -533,6 +537,7 @@ func NewLexSpec() *lexSpec {
KindIDColon,
KindIDOr,
KindIDSemicolon,
+ KindIDLabelMarker,
KindIDPosition,
KindIDExpansion,
KindIDDirectiveMarker,
@@ -565,6 +570,7 @@ func NewLexSpec() *lexSpec {
KindNameColon,
KindNameOr,
KindNameSemicolon,
+ KindNameLabelMarker,
KindNamePosition,
KindNameExpansion,
KindNameDirectiveMarker,
@@ -587,7 +593,7 @@ func NewLexSpec() *lexSpec {
{
0, 1, 2, 3, 4, 5, 6, 7, 6, 8, 6, 9, 6, 10, 6, 11, 12, 6, 13, 14,
6, 15, 16, 6, 17, 18, 19, 20, 21, 22, 23, 24, 24, 25, 26, 27, 28, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
},
{
0, 1, 2, 3, 2, 4, 2, 5, 2, 6, 2, 7, 8, 2, 9, 10, 2, 11, 12, 2,
@@ -635,7 +641,7 @@ func NewLexSpec() *lexSpec {
5, 5, 5, 5, 5, 1, 1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, -1, 1, -1, -1, -1, -1,
-1, -1, 1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, -1, -1,
- -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 1, -1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1,
1, -1, -1, -1, -1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
@@ -979,9 +985,9 @@ func NewLexSpec() *lexSpec {
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 11, 13, 13,
15, 18, 18, 18, 21, 2, 37, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 38, 45, 33, 46, 0, 39, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 38, 46, 33, 47, 0, 39, 0, 0, 0, 0,
0, 0, 35, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, 42, 0, 0, 0, 0,
- 0, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
+ 43, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
32, 32, 32, 32, 32, 32, 32, 0, 0, 0, 0, 32, 0, 32, 32, 32, 32, 32, 24, 32,
32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 0,
41, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
@@ -1044,10 +1050,10 @@ func NewLexSpec() *lexSpec {
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22,
- 22, 22, 22, 43, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
+ 22, 22, 22, 44, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
34, 34, 34, 2, 0, 37, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 5, 36, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 5, 36, 45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,