aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--driver/parser_test.go14
-rw-r--r--grammar/grammar_test.go2
-rw-r--r--spec/grammar/parser.go17
-rw-r--r--spec/grammar/parser_test.go85
-rw-r--r--spec/grammar/syntax_error.go1
6 files changed, 102 insertions, 19 deletions
diff --git a/README.md b/README.md
index 3718437..9c8d649 100644
--- a/README.md
+++ b/README.md
@@ -302,7 +302,7 @@ Alternative:
<element-1> <element-2> ... <element-N>
```
-An element an alternative contains is a terminal symbol, a non-terminal symbol, a pattern, or a string literal.
+An element an alternative contains is a terminal symbol, a non-terminal symbol, or a string literal. Unlike string literals, patterns cannot be contained in alternatives.
You can define terminal symbols in the same grammar as non-terminal symbols.
diff --git a/driver/parser_test.go b/driver/parser_test.go
index 9e232f7..215988d 100644
--- a/driver/parser_test.go
+++ b/driver/parser_test.go
@@ -50,18 +50,20 @@ func TestParser_Parse(t *testing.T) {
#name test;
expr
- : expr "\+" term
+ : expr '+' term
| term
;
term
- : term "\*" factor
+ : term '*' factor
| factor
;
factor
- : "\(" expr "\)"
+ : '(' expr ')'
| id
;
-id: "[A-Za-z_][0-9A-Za-z_]*";
+
+id
+ : "[A-Za-z_][0-9A-Za-z_]*";
`,
src: `(a+(b+c))*d+e`,
cst: nonTermNode("expr",
@@ -404,10 +406,10 @@ fragment words: "[A-Za-z\u{0020}]+";
#name test;
list
- : "\[" elems "]" #ast elems...
+ : '[' elems ']' #ast elems...
;
elems
- : elems "," id #ast elems... id
+ : elems ',' id #ast elems... id
| id
;
diff --git a/grammar/grammar_test.go b/grammar/grammar_test.go
index 52c7fb8..5d18029 100644
--- a/grammar/grammar_test.go
+++ b/grammar/grammar_test.go
@@ -2686,7 +2686,7 @@ foo
#name test;
s
- : foo "bar"@b #ast foo b...
+ : foo 'bar'@b #ast foo b...
;
foo
diff --git a/spec/grammar/parser.go b/spec/grammar/parser.go
index 2bb6a4a..946d877 100644
--- a/spec/grammar/parser.go
+++ b/spec/grammar/parser.go
@@ -338,12 +338,27 @@ func (p *parser) parseProduction() *ProductionNode {
}
}
- return &ProductionNode{
+ prod := &ProductionNode{
Directives: dirs,
LHS: lhs,
RHS: rhs,
Pos: lhsPos,
}
+
+ // Vartan's driver must provide a user with the names of expected tokens when a syntax error occurs.
+ // However, if a pattern appears directly in an alternative, Vartan's compiler cannot assign an appropriate
+ // name to the pattern. Therefore, this code prohibits alternatives from containing patterns.
+ if !prod.isLexical() {
+ for _, alt := range prod.RHS {
+ for _, elem := range alt.Elements {
+ if elem.Pattern != "" && !elem.Literally {
+ raiseSyntaxError(elem.Pos.Row, synErrPatternInAlt)
+ }
+ }
+ }
+ }
+
+ return prod
}
func (p *parser) parseAlternative() *AlternativeNode {
diff --git a/spec/grammar/parser_test.go b/spec/grammar/parser_test.go
index dcacd7d..4fd7e9f 100644
--- a/spec/grammar/parser_test.go
+++ b/spec/grammar/parser_test.go
@@ -272,23 +272,25 @@ func TestParse(t *testing.T) {
{
caption: "multiple productions are a valid grammar",
src: `
-e: e "\+|-" t | t;
-t: t "\*|/" f | f;
-f: "\(" e ")" | id;
+e: e '+' t | e '-' t | t;
+t: t '*' f | t '/' f | f;
+f: '(' e ')' | id;
id: "[A-Za-z_][0-9A-Za-z_]*";
`,
ast: &RootNode{
Productions: []*ProductionNode{
prod("e",
- alt(id("e"), pat(`\+|-`), id("t")),
+ alt(id("e"), pat(`+`), id("t")),
+ alt(id("e"), pat(`-`), id("t")),
alt(id("t")),
),
prod("t",
- alt(id("t"), pat(`\*|/`), id("f")),
+ alt(id("t"), pat(`*`), id("f")),
+ alt(id("t"), pat(`/`), id("f")),
alt(id("f")),
),
prod("f",
- alt(pat(`\(`), id("e"), pat(`)`)),
+ alt(pat(`(`), id("e"), pat(`)`)),
alt(id("id")),
),
},
@@ -302,8 +304,8 @@ id: "[A-Za-z_][0-9A-Za-z_]*";
{
caption: "productions can contain the empty alternative",
src: `
-a: "foo" | ;
-b: | "bar";
+a: 'foo' | ;
+b: | 'bar';
c: ;
`,
ast: &RootNode{
@@ -330,6 +332,69 @@ a: $x;
synErr: synErrNoSemicolon,
},
{
+ caption: "an alternative can contain a string literal without a terminal symbol",
+ src: `
+s
+ : 'foo' bar
+ ;
+
+bar
+ : 'bar';
+`,
+ ast: &RootNode{
+ Productions: []*ProductionNode{
+ prod("s",
+ alt(pat(`foo`), id("bar")),
+ ),
+ },
+ LexProductions: []*ProductionNode{
+ prod("bar",
+ alt(pat(`bar`)),
+ ),
+ },
+ },
+ },
+ {
+ caption: "an alternative cannot contain a pattern directly",
+ src: `
+s
+ : "foo" bar
+ ;
+
+bar
+ : "bar";
+`,
+ synErr: synErrPatternInAlt,
+ },
+ {
+ caption: "a terminal symbol can be defined using a string literal",
+ src: `
+foo
+ : 'foo';
+`,
+ ast: &RootNode{
+ LexProductions: []*ProductionNode{
+ prod("foo",
+ alt(pat(`foo`)),
+ ),
+ },
+ },
+ },
+ {
+ caption: "a terminal symbol can be defined using a pattern",
+ src: `
+foo
+ : "foo";
+`,
+ ast: &RootNode{
+ LexProductions: []*ProductionNode{
+ prod("foo",
+ alt(pat(`foo`)),
+ ),
+ },
+ },
+ },
+ {
caption: "`fragment` is a reserved word",
src: `fragment: 'fragment';`,
synErr: synErrNoProductionName,
@@ -656,7 +721,7 @@ a
caption: "an AST has node positions",
src: `
exp
- : exp "\+" id #ast exp id
+ : exp '+' id #ast exp id
| id
;
@@ -678,7 +743,7 @@ fragment number
withAltDir(
alt(
withElemPos(id("exp"), newPos(3)),
- withElemPos(pat(`\+`), newPos(3)),
+ withElemPos(pat(`+`), newPos(3)),
withElemPos(id("id"), newPos(3)),
),
withDirPos(
diff --git a/spec/grammar/syntax_error.go b/spec/grammar/syntax_error.go
index 1fec801..1f9664b 100644
--- a/spec/grammar/syntax_error.go
+++ b/spec/grammar/syntax_error.go
@@ -37,6 +37,7 @@ var (
synErrNoDirectiveName = newSyntaxError("a directive needs a name")
synErrNoOrderedSymbolName = newSyntaxError("an ordered symbol name is missing")
synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )")
+ synErrPatternInAlt = newSyntaxError("a pattern literal cannot appear directly in an alternative. instead, please define a terminal symbol with the pattern literal")
synErrStrayExpOp = newSyntaxError("an expansion operator ... must be preceded by an identifier")
synErrInvalidExpOperand = newSyntaxError("an expansion operator ... can be applied to only an identifier")
synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline")