diff options
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | driver/parser_test.go | 14 | ||||
-rw-r--r-- | grammar/grammar_test.go | 2 | ||||
-rw-r--r-- | spec/grammar/parser.go | 17 | ||||
-rw-r--r-- | spec/grammar/parser_test.go | 85 | ||||
-rw-r--r-- | spec/grammar/syntax_error.go | 1 |
6 files changed, 102 insertions, 19 deletions
@@ -302,7 +302,7 @@ Alternative: <element-1> <element-2> ... <element-N> ``` -An element an alternative contains is a terminal symbol, a non-terminal symbol, a pattern, or a string literal. +An element an alternative contains is a terminal symbol, a non-terminal symbol, or a string literal. Unlike string literals, patterns cannot be contained in alternatives. You can define terminal symbols in the same grammar as non-terminal symbols. diff --git a/driver/parser_test.go b/driver/parser_test.go index 9e232f7..215988d 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -50,18 +50,20 @@ func TestParser_Parse(t *testing.T) { #name test; expr - : expr "\+" term + : expr '+' term | term ; term - : term "\*" factor + : term '*' factor | factor ; factor - : "\(" expr "\)" + : '(' expr ')' | id ; -id: "[A-Za-z_][0-9A-Za-z_]*"; + +id + : "[A-Za-z_][0-9A-Za-z_]*"; `, src: `(a+(b+c))*d+e`, cst: nonTermNode("expr", @@ -404,10 +406,10 @@ fragment words: "[A-Za-z\u{0020}]+"; #name test; list - : "\[" elems "]" #ast elems... + : '[' elems ']' #ast elems... ; elems - : elems "," id #ast elems... id + : elems ',' id #ast elems... id | id ; diff --git a/grammar/grammar_test.go b/grammar/grammar_test.go index 52c7fb8..5d18029 100644 --- a/grammar/grammar_test.go +++ b/grammar/grammar_test.go @@ -2686,7 +2686,7 @@ foo #name test; s - : foo "bar"@b #ast foo b... + : foo 'bar'@b #ast foo b... ; foo diff --git a/spec/grammar/parser.go b/spec/grammar/parser.go index 2bb6a4a..946d877 100644 --- a/spec/grammar/parser.go +++ b/spec/grammar/parser.go @@ -338,12 +338,27 @@ func (p *parser) parseProduction() *ProductionNode { } } - return &ProductionNode{ + prod := &ProductionNode{ Directives: dirs, LHS: lhs, RHS: rhs, Pos: lhsPos, } + + // Vartan's driver must provide a user with the names of expected tokens when a syntax error occurs. + // However, if a pattern appears directly in an alternative, Vartan's compiler cannot assign an appropriate + // name to the pattern. Therefore, this code prohibits alternatives from containing patterns. + if !prod.isLexical() { + for _, alt := range prod.RHS { + for _, elem := range alt.Elements { + if elem.Pattern != "" && !elem.Literally { + raiseSyntaxError(elem.Pos.Row, synErrPatternInAlt) + } + } + } + } + + return prod } func (p *parser) parseAlternative() *AlternativeNode { diff --git a/spec/grammar/parser_test.go b/spec/grammar/parser_test.go index dcacd7d..4fd7e9f 100644 --- a/spec/grammar/parser_test.go +++ b/spec/grammar/parser_test.go @@ -272,23 +272,25 @@ func TestParse(t *testing.T) { { caption: "multiple productions are a valid grammar", src: ` -e: e "\+|-" t | t; -t: t "\*|/" f | f; -f: "\(" e ")" | id; +e: e '+' t | e '-' t | t; +t: t '*' f | t '/' f | f; +f: '(' e ')' | id; id: "[A-Za-z_][0-9A-Za-z_]*"; `, ast: &RootNode{ Productions: []*ProductionNode{ prod("e", - alt(id("e"), pat(`\+|-`), id("t")), + alt(id("e"), pat(`+`), id("t")), + alt(id("e"), pat(`-`), id("t")), alt(id("t")), ), prod("t", - alt(id("t"), pat(`\*|/`), id("f")), + alt(id("t"), pat(`*`), id("f")), + alt(id("t"), pat(`/`), id("f")), alt(id("f")), ), prod("f", - alt(pat(`\(`), id("e"), pat(`)`)), + alt(pat(`(`), id("e"), pat(`)`)), alt(id("id")), ), }, @@ -302,8 +304,8 @@ id: "[A-Za-z_][0-9A-Za-z_]*"; { caption: "productions can contain the empty alternative", src: ` -a: "foo" | ; -b: | "bar"; +a: 'foo' | ; +b: | 'bar'; c: ; `, ast: &RootNode{ @@ -330,6 +332,69 @@ a: $x; synErr: synErrNoSemicolon, }, { + caption: "an alternative can contain a string literal without a terminal symbol", + src: ` +s + : 'foo' bar + ; + +bar + : 'bar'; +`, + ast: &RootNode{ + Productions: []*ProductionNode{ + prod("s", + alt(pat(`foo`), id("bar")), + ), + }, + LexProductions: []*ProductionNode{ + prod("bar", + alt(pat(`bar`)), + ), + }, + }, + }, + { + caption: "an alternative cannot contain a pattern directly", + src: ` +s + : "foo" bar + ; + +bar + : "bar"; +`, + synErr: synErrPatternInAlt, + }, + { + caption: "a terminal symbol can be defined using a string literal", + src: ` +foo + : 'foo'; +`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + prod("foo", + alt(pat(`foo`)), + ), + }, + }, + }, + { + caption: "a terminal symbol can be defined using a pattern", + src: ` +foo + : "foo"; +`, + ast: &RootNode{ + LexProductions: []*ProductionNode{ + prod("foo", + alt(pat(`foo`)), + ), + }, + }, + }, + { caption: "`fragment` is a reserved word", src: `fragment: 'fragment';`, synErr: synErrNoProductionName, @@ -656,7 +721,7 @@ a caption: "an AST has node positions", src: ` exp - : exp "\+" id #ast exp id + : exp '+' id #ast exp id | id ; @@ -678,7 +743,7 @@ fragment number withAltDir( alt( withElemPos(id("exp"), newPos(3)), - withElemPos(pat(`\+`), newPos(3)), + withElemPos(pat(`+`), newPos(3)), withElemPos(id("id"), newPos(3)), ), withDirPos( diff --git a/spec/grammar/syntax_error.go b/spec/grammar/syntax_error.go index 1fec801..1f9664b 100644 --- a/spec/grammar/syntax_error.go +++ b/spec/grammar/syntax_error.go @@ -37,6 +37,7 @@ var ( synErrNoDirectiveName = newSyntaxError("a directive needs a name") synErrNoOrderedSymbolName = newSyntaxError("an ordered symbol name is missing") synErrUnclosedDirGroup = newSyntaxError("a directive group must be closed by )") + synErrPatternInAlt = newSyntaxError("a pattern literal cannot appear directly in an alternative. instead, please define a terminal symbol with the pattern literal") synErrStrayExpOp = newSyntaxError("an expansion operator ... must be preceded by an identifier") synErrInvalidExpOperand = newSyntaxError("an expansion operator ... can be applied to only an identifier") synErrSemicolonNoNewline = newSyntaxError("a semicolon must be followed by a newline") |