diff options
-rw-r--r-- | README.md | 37 | ||||
-rw-r--r-- | driver/conflict_test.go | 24 | ||||
-rw-r--r-- | driver/parser_test.go | 126 | ||||
-rw-r--r-- | driver/semantic_action_test.go | 21 | ||||
-rw-r--r-- | driver/syntax_error_test.go | 36 | ||||
-rw-r--r-- | grammar/grammar.go | 66 | ||||
-rw-r--r-- | grammar/semantic_error.go | 2 | ||||
-rw-r--r-- | spec/parser.go | 25 | ||||
-rw-r--r-- | spec/parser_test.go | 170 |
9 files changed, 300 insertions, 207 deletions
@@ -36,7 +36,7 @@ expr | expr mul expr | expr div expr | func_call - | integer + | int | id | '(' expr ')' #ast expr ; @@ -49,13 +49,20 @@ args | expr ; -whitespaces: "[\u{0009}\u{0020}]+" #skip; -integer: "0|[1-9][0-9]*"; -id: "[A-Za-z_][0-9A-Za-z_]*"; -add: '+'; -sub: '-'; -mul: '*'; -div: '/'; +ws #skip + : "[\u{0009}\u{0020}]+"; +int + : "0|[1-9][0-9]*"; +id + : "[A-Za-z_][0-9A-Za-z_]*"; +add + : '+'; +sub + : '-'; +mul + : '*'; +div + : '/'; ``` Save the above grammar to a file in UTF-8. In this explanation, the file name is `expr.vr`. @@ -87,7 +94,7 @@ expr │ ├─ id "foo" │ └─ args │ ├─ expr -│ │ └─ integer "10" +│ │ └─ int "10" │ └─ expr │ └─ func_call │ ├─ id "bar" @@ -97,7 +104,7 @@ expr ├─ add "+" └─ expr ├─ expr - │ └─ integer "99" + │ └─ int "99" ├─ mul "*" └─ expr └─ id "x" @@ -126,8 +133,8 @@ LALR(1) 3 - - x_1 (\() 4 - - x_2 (\)) 5 - - x_3 (,) - 6 - - whitespaces - 7 - - integer + 6 - - ws + 7 - - int 8 - - id 9 2 l add (+) 10 2 l sub (-) @@ -142,7 +149,7 @@ LALR(1) 4 1 l expr → expr * expr 5 1 l expr → expr / expr 6 - - expr → func_call - 7 - - expr → integer + 7 - - expr → int 8 - - expr → id 9 - - expr → \( expr \) 10 - - func_call → id \( args \) @@ -157,7 +164,7 @@ LALR(1) 1 expr' → ・ expr shift 3 on \( -shift 4 on integer +shift 4 on int shift 5 on id goto 1 on expr goto 2 on func_call @@ -289,7 +296,7 @@ expr │ └─ id "foo" ├─ add "+" └─ expr - └─ integer "99" + └─ int "99" ``` ```sh diff --git a/driver/conflict_test.go b/driver/conflict_test.go index f507d4f..1f8914f 100644 --- a/driver/conflict_test.go +++ b/driver/conflict_test.go @@ -179,10 +179,14 @@ expr | id ; -whitespaces: "[\u{0009}\u{0020}]+" #skip; -r1: 'r1'; -r2: 'r2'; -id: "[A-Za-z0-9_]+"; +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +r1 + : 'r1'; +r2 + : 'r2'; +id + : "[A-Za-z0-9_]+"; `, src: `a r2 b r1 c r1 d r2 e`, cst: nonTermNode("expr", @@ -226,10 +230,14 @@ expr | id ; -whitespaces: "[\u{0009}\u{0020}]+" #skip; -r1: 'r1'; -r2: 'r2'; -id: "[A-Za-z0-9_]+"; +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +r1 + : 'r1'; +r2 + : 'r2'; +id + : "[A-Za-z0-9_]+"; `, src: `a r2 b r1 c r1 d r2 e`, cst: nonTermNode("expr", diff --git a/driver/parser_test.go b/driver/parser_test.go index 9a3e040..9d89efa 100644 --- a/driver/parser_test.go +++ b/driver/parser_test.go @@ -245,7 +245,38 @@ bar: "bar"; a : foo ; -foo: "foo" #skip; + +foo #skip + : "foo"; +`, + src: `foo`, + specErr: true, + }, + // A lexical production cannot have alternative productions. + { + specSrc: ` +%name test + +s + : foo + ; + +foo: 'foo' #skip; +`, + src: `foo`, + specErr: true, + }, + // A directive must not be duplicated. + { + specSrc: ` +%name test + +s + : foo + ; + +foo #skip #skip + : 'foo'; `, src: `foo`, specErr: true, @@ -265,16 +296,16 @@ mode_tran | pop_m2 ; -push_m1 - : "->" #push m1; -push_m2 #mode m1 - : "-->" #push m2; -pop_m1 #mode m1 - : "<-" #pop; -pop_m2 #mode m2 - : "<--" #pop; -whitespace #mode default m1 m2 - : "\u{0020}+" #skip; +push_m1 #push m1 + : "->"; +push_m2 #mode m1 #push m2 + : "-->"; +pop_m1 #mode m1 #pop + : "<-"; +pop_m2 #mode m2 #pop + : "<--"; +whitespace #mode default m1 m2 #skip + : "\u{0020}+"; `, src: ` -> --> <-- <- `, }, @@ -301,9 +332,13 @@ bar #mode default s : foo bar ; -foo: "foo"; -bar: "bar"; -white_space: "[\u{0009}\u{0020}]+" #skip; + +foo + : "foo"; +bar + : "bar"; +white_space #skip + : "[\u{0009}\u{0020}]+"; `, src: `foo bar`, }, @@ -332,8 +367,11 @@ elems : elems "," id #ast elems... id | id ; -whitespace: "\u{0020}+" #skip; -id: "[A-Za-z]+"; + +whitespace #skip + : "\u{0020}+"; +id + : "[A-Za-z]+"; `, src: `[Byers, Frohike, Langly]`, cst: nonTermNode("list", @@ -390,20 +428,6 @@ num: "0|[1-9][0-9]*"; ), ), }, - // An ast action cannot be applied to a terminal symbol. - { - specSrc: ` -%name test - -s - : foo - ; -foo - : "foo"@f #ast f... - ; -`, - specErr: true, - }, // The expansion cannot be applied to a terminal symbol. { specSrc: ` @@ -636,12 +660,18 @@ expr | int ; -ws: "[\u{0009}\u{0020}]+" #skip; -int: "0|[1-9][0-9]*"; -add: '+'; -sub: '-'; -mul: '*'; -div: '/'; +ws #skip + : "[\u{0009}\u{0020}]+"; +int + : "0|[1-9][0-9]*"; +add + : '+'; +sub + : '-'; +mul + : '*'; +div + : '/'; `, // This source is recognized as the following structure because the production `expr → sub expr` // has the `#prec mul` directive and has the same precedence and associativity of the symbol `mul`. @@ -729,8 +759,10 @@ s | error ';' ; -ws: "[\u{0009}\u{0020}]+" #skip; -id: "[A-Za-z_]+"; +ws #skip + : "[\u{0009}\u{0020}]+"; +id + : "[A-Za-z_]+"; `, src: `foo bar baz ;`, }, @@ -748,8 +780,10 @@ elem | error ';' #recover ; -ws: "[\u{0009}\u{0020}]+" #skip; -id: "[A-Za-z_]+"; +ws #skip + : "[\u{0009}\u{0020}]+"; +id + : "[A-Za-z_]+"; `, src: `a b c ; d e f ;`, }, @@ -767,8 +801,10 @@ elem | error ';' #recover foo ; -ws: "[\u{0009}\u{0020}]+" #skip; -id: "[A-Za-z_]+"; +ws #skip + : "[\u{0009}\u{0020}]+"; +id + : "[A-Za-z_]+"; `, src: `a b c ; d e f ;`, specErr: true, @@ -814,8 +850,10 @@ s : foo ; -foo: 'foo'; -error: 'error' #skip; +foo + : 'foo'; +error #skip + : 'error'; `, specErr: true, }, diff --git a/driver/semantic_action_test.go b/driver/semantic_action_test.go index 791b5f0..f9708b7 100644 --- a/driver/semantic_action_test.go +++ b/driver/semantic_action_test.go @@ -59,10 +59,14 @@ elem : char char char ; -ws: "[\u{0009}\u{0020}]+" #skip; -semicolon: ';'; -star: '*'; -char: "[a-z]"; +ws #skip + : "[\u{0009}\u{0020}]+"; +semicolon + : ';'; +star + : '*'; +char + : "[a-z]"; ` specSrcWithoutErrorProd := ` @@ -76,9 +80,12 @@ elem : char char char ; -ws: "[\u{0009}\u{0020}]+" #skip; -semicolon: ';'; -char: "[a-z]"; +ws #skip + : "[\u{0009}\u{0020}]+"; +semicolon + : ';'; +char + : "[a-z]"; ` tests := []struct { diff --git a/driver/syntax_error_test.go b/driver/syntax_error_test.go index a7f363f..329ccef 100644 --- a/driver/syntax_error_test.go +++ b/driver/syntax_error_test.go @@ -44,10 +44,14 @@ elem : a b c ; -ws: "[\u{0009}\u{0020}]+" #skip; -a: 'a'; -b: 'b'; -c: 'c'; +ws #skip + : "[\u{0009}\u{0020}]+"; +a + : 'a'; +b + : 'b'; +c + : 'c'; `, src: `!; a!; ab!;`, synErrCount: 3, @@ -66,10 +70,14 @@ elem : a b c ; -ws: "[\u{0009}\u{0020}]+" #skip; -a: 'a'; -b: 'b'; -c: 'c'; +ws #skip + : "[\u{0009}\u{0020}]+"; +a + : 'a'; +b + : 'b'; +c + : 'c'; `, // After the parser trasits to the error state reading the first invalid symbol ('!'), // the second and third invalid symbols ('!') are ignored. @@ -90,10 +98,14 @@ elem : a b c ; -ws: "[\u{0009}\u{0020}]+" #skip; -a: 'a'; -b: 'b'; -c: 'c'; +ws #skip + : "[\u{0009}\u{0020}]+"; +a + : 'a'; +b + : 'b'; +c + : 'c'; `, src: `!**; a!**; ab!**; abc!`, synErrCount: 4, diff --git a/grammar/grammar.go b/grammar/grammar.go index 9e8f30a..c5726d7 100644 --- a/grammar/grammar.go +++ b/grammar/grammar.go @@ -472,9 +472,34 @@ func (b *GrammarBuilder) genSymbolTableAndLexSpec(root *spec.RootNode) (*symbolT } func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, string, *verr.SpecError, error) { + alt := prod.RHS[0] + elem := alt.Elements[0] + + var pattern string + var alias string + if elem.Literally { + pattern = mlspec.EscapePattern(elem.Pattern) + alias = elem.Pattern + } else { + pattern = elem.Pattern + } + var modes []mlspec.LexModeName - if prod.Directive != nil { - dir := prod.Directive + var skip bool + var push mlspec.LexModeName + var pop bool + dirConsumed := map[string]struct{}{} + for _, dir := range prod.Directives { + if _, consumed := dirConsumed[dir.Name]; consumed { + return nil, false, "", &verr.SpecError{ + Cause: semErrDuplicateDir, + Detail: dir.Name, + Row: dir.Pos.Row, + Col: dir.Pos.Col, + }, nil + } + dirConsumed[dir.Name] = struct{}{} + switch dir.Name { case "mode": if len(dir.Parameters) == 0 { @@ -496,34 +521,6 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, string, *ve } modes = append(modes, mlspec.LexModeName(param.ID)) } - default: - return nil, false, "", &verr.SpecError{ - Cause: semErrDirInvalidName, - Detail: dir.Name, - Row: dir.Pos.Row, - Col: dir.Pos.Col, - }, nil - } - } - - alt := prod.RHS[0] - elem := alt.Elements[0] - - var pattern string - var alias string - if elem.Literally { - pattern = mlspec.EscapePattern(elem.Pattern) - alias = elem.Pattern - } else { - pattern = elem.Pattern - } - - var skip bool - var push mlspec.LexModeName - var pop bool - if alt.Directive != nil { - dir := alt.Directive - switch dir.Name { case "skip": if len(dir.Parameters) > 0 { return nil, false, "", &verr.SpecError{ @@ -574,6 +571,15 @@ func genLexEntry(prod *spec.ProductionNode) (*mlspec.LexEntry, bool, string, *ve } } + if alt.Directive != nil { + return nil, false, "", &verr.SpecError{ + Cause: semErrInvalidAltDir, + Detail: "a lexical production cannot have alternative directives", + Row: alt.Directive.Pos.Row, + Col: alt.Directive.Pos.Col, + }, nil + } + return &mlspec.LexEntry{ Modes: modes, Kind: mlspec.LexKindName(prod.LHS), diff --git a/grammar/semantic_error.go b/grammar/semantic_error.go index 4326d81..7ff1ba7 100644 --- a/grammar/semantic_error.go +++ b/grammar/semantic_error.go @@ -31,4 +31,6 @@ var ( semErrInvalidLabel = newSemanticError("a label must differ from terminal symbols or non-terminal symbols") semErrDirInvalidName = newSemanticError("invalid directive name") semErrDirInvalidParam = newSemanticError("invalid parameter") + semErrDuplicateDir = newSemanticError("a directive must not be duplicated") + semErrInvalidAltDir = newSemanticError("invalid alternative directive") ) diff --git a/spec/parser.go b/spec/parser.go index be20e7d..fd11f83 100644 --- a/spec/parser.go +++ b/spec/parser.go @@ -16,10 +16,10 @@ type RootNode struct { } type ProductionNode struct { - Directive *DirectiveNode - LHS string - RHS []*AlternativeNode - Pos Position + Directives []*DirectiveNode + LHS string + RHS []*AlternativeNode + Pos Position } func (n *ProductionNode) isLexical() bool { @@ -312,7 +312,14 @@ func (p *parser) parseProduction() *ProductionNode { lhs := p.lastTok.text lhsPos := p.lastTok.pos - dir := p.parseDirective() + var dirs []*DirectiveNode + for { + dir := p.parseDirective() + if dir == nil { + break + } + dirs = append(dirs, dir) + } p.consume(tokenKindNewline) @@ -345,10 +352,10 @@ func (p *parser) parseProduction() *ProductionNode { } return &ProductionNode{ - Directive: dir, - LHS: lhs, - RHS: rhs, - Pos: lhsPos, + Directives: dirs, + LHS: lhs, + RHS: rhs, + Pos: lhsPos, } } diff --git a/spec/parser_test.go b/spec/parser_test.go index 24e9468..e579fad 100644 --- a/spec/parser_test.go +++ b/spec/parser_test.go @@ -30,8 +30,8 @@ func TestParse(t *testing.T) { prod.Pos = pos return prod } - withProdDir := func(prod *ProductionNode, dir *DirectiveNode) *ProductionNode { - prod.Directive = dir + withProdDir := func(prod *ProductionNode, dirs ...*DirectiveNode) *ProductionNode { + prod.Directives = dirs return prod } alt := func(elems ...*ElementNode) *AlternativeNode { @@ -299,16 +299,16 @@ mode_tran | pop_m2 ; -push_m1 - : "->" #push m1; -push_m2 #mode m1 - : "-->" #push m2; -pop_m1 #mode m1 - : "<-" #pop; -pop_m2 #mode m2 - : "<--" #pop; -whitespace #mode default m1 m2 - : "\u{0020}+" #skip; +push_m1 #push m1 + : "->"; +push_m2 #mode m1 #push m2 + : "-->"; +pop_m1 #mode m1 #pop + : "<-"; +pop_m2 #mode m2 #pop + : "<--"; +whitespace #mode default m1 m2 #skip + : "\u{0020}+"; `, ast: &RootNode{ Productions: []*ProductionNode{ @@ -324,47 +324,39 @@ whitespace #mode default m1 m2 ), }, LexProductions: []*ProductionNode{ - prod("push_m1", - withAltDir( + withProdDir( + prod("push_m1", alt(pat(`->`)), - dir("push", idParam("m1")), ), + dir("push", idParam("m1")), ), withProdDir( prod("push_m2", - withAltDir( - alt(pat(`-->`)), - dir("push", idParam("m2")), - ), + alt(pat(`-->`)), ), dir("mode", idParam("m1")), + dir("push", idParam("m2")), ), withProdDir( prod("pop_m1", - withAltDir( - alt(pat(`<-`)), - dir("pop"), - ), + alt(pat(`<-`)), ), dir("mode", idParam("m1")), + dir("pop"), ), withProdDir( prod("pop_m2", - withAltDir( - alt(pat(`<--`)), - dir("pop"), - ), + alt(pat(`<--`)), ), dir("mode", idParam("m2")), + dir("pop"), ), withProdDir( prod("whitespace", - withAltDir( - alt(pat(`\u{0020}+`)), - dir("skip"), - ), + alt(pat(`\u{0020}+`)), ), dir("mode", idParam("default"), idParam("m1"), idParam("m2")), + dir("skip"), ), }, }, @@ -425,10 +417,15 @@ exp : exp "\+" id #ast exp id | id ; -whitespace: "\u{0020}+" #skip; -id: "\f{letter}(\f{letter}|\f{number})*"; -fragment letter: "[A-Za-z_]"; -fragment number: "[0-9]"; + +whitespace #skip + : "\u{0020}+"; +id + : "\f{letter}(\f{letter}|\f{number})*"; +fragment letter + : "[A-Za-z_]"; +fragment number + : "[0-9]"; `, checkPosition: true, ast: &RootNode{ @@ -464,24 +461,24 @@ fragment number: "[0-9]"; }, LexProductions: []*ProductionNode{ withProdPos( - prod("whitespace", - withAltPos( - withAltDir( + withProdDir( + prod("whitespace", + withAltPos( alt( withElemPos( pat(`\u{0020}+`), - newPos(6), + newPos(8), ), ), - withDirPos( - dir("skip"), - newPos(6), - ), + newPos(8), ), - newPos(6), + ), + withDirPos( + dir("skip"), + newPos(7), ), ), - newPos(6), + newPos(7), ), withProdPos( prod("id", @@ -489,23 +486,23 @@ fragment number: "[0-9]"; alt( withElemPos( pat(`\f{letter}(\f{letter}|\f{number})*`), - newPos(7), + newPos(10), ), ), - newPos(7), + newPos(10), ), ), - newPos(7), + newPos(9), ), }, Fragments: []*FragmentNode{ withFragmentPos( frag("letter", "[A-Za-z_]"), - newPos(8), + newPos(11), ), withFragmentPos( frag("number", "[0-9]"), - newPos(9), + newPos(13), ), }, }, @@ -614,14 +611,22 @@ s | id r1 id r2 id r3 id ; -whitespaces: "[\u{0009}\u{0020}]+" #skip; -l1: 'l1'; -l2: 'l2'; -l3: 'l3'; -r1: 'r1'; -r2: 'r2'; -r3: 'r3'; -id: "[A-Za-z0-9_]+"; +whitespaces #skip + : "[\u{0009}\u{0020}]+"; +l1 + : 'l1'; +l2 + : 'l2'; +l3 + : 'l3'; +r1 + : 'r1'; +r2 + : 'r2'; +r3 + : 'r3'; +id + : "[A-Za-z0-9_]+"; `, ast: &RootNode{ MetaData: []*DirectiveNode{ @@ -659,11 +664,11 @@ id: "[A-Za-z0-9_]+"; ), }, LexProductions: []*ProductionNode{ - prod("whitespaces", - withAltDir( + withProdDir( + prod("whitespaces", alt(pat(`[\u{0009}\u{0020}]+`)), - dir("skip"), ), + dir("skip"), ), prod("l1", alt(pat(`l1`))), prod("l2", alt(pat(`l2`))), @@ -713,7 +718,7 @@ func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { t.Fatalf("unexpected length of meta data; want: %v, got: %v", len(expected.MetaData), len(root.MetaData)) } for i, md := range root.MetaData { - testDirective(t, md, expected.MetaData[i], true) + testDirectives(t, []*DirectiveNode{md}, []*DirectiveNode{expected.MetaData[i]}, true) } for i, prod := range root.Productions { testProductionNode(t, prod, expected.Productions[i], checkPosition) @@ -728,14 +733,11 @@ func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { func testProductionNode(t *testing.T, prod, expected *ProductionNode, checkPosition bool) { t.Helper() - if expected.Directive == nil && prod.Directive != nil { - t.Fatalf("unexpected directive; want: nil, got: %+v", prod.Directive) + if len(expected.Directives) != len(prod.Directives) { + t.Fatalf("unexpected directive count; want: %v directives, got: %v directives", len(expected.Directives), len(prod.Directives)) } - if expected.Directive != nil { - if prod.Directive == nil { - t.Fatalf("a directive is not set; want: %+v, got: nil", expected.Directive) - } - testDirective(t, prod.Directive, expected.Directive, checkPosition) + if len(expected.Directives) > 0 { + testDirectives(t, prod.Directives, expected.Directives, checkPosition) } if prod.LHS != expected.LHS { t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, prod.LHS) @@ -779,7 +781,7 @@ func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode, checkPosi if alt.Directive == nil { t.Fatalf("a directive is not set; want: %+v, got: nil", expected.Directive) } - testDirective(t, alt.Directive, expected.Directive, checkPosition) + testDirectives(t, []*DirectiveNode{alt.Directive}, []*DirectiveNode{expected.Directive}, checkPosition) } if checkPosition { testPosition(t, alt.Pos, expected.Pos) @@ -799,19 +801,23 @@ func testElementNode(t *testing.T, elem, expected *ElementNode, checkPosition bo } } -func testDirective(t *testing.T, dir, expected *DirectiveNode, checkPosition bool) { +func testDirectives(t *testing.T, dirs, expected []*DirectiveNode, checkPosition bool) { t.Helper() - if expected.Name != dir.Name { - t.Fatalf("unexpected directive name; want: %+v, got: %+v", expected.Name, dir.Name) - } - if len(expected.Parameters) != len(dir.Parameters) { - t.Fatalf("unexpected directive parameter; want: %+v, got: %+v", expected.Parameters, dir.Parameters) - } - for i, param := range dir.Parameters { - testParameter(t, param, expected.Parameters[i], checkPosition) - } - if checkPosition { - testPosition(t, dir.Pos, expected.Pos) + for i, exp := range expected { + dir := dirs[i] + + if exp.Name != dir.Name { + t.Fatalf("unexpected directive name; want: %+v, got: %+v", exp.Name, dir.Name) + } + if len(exp.Parameters) != len(dir.Parameters) { + t.Fatalf("unexpected directive parameter; want: %+v, got: %+v", exp.Parameters, dir.Parameters) + } + for j, expParam := range exp.Parameters { + testParameter(t, dir.Parameters[j], expParam, checkPosition) + } + if checkPosition { + testPosition(t, dir.Pos, exp.Pos) + } } } |