diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-07-18 17:09:52 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-07-18 17:09:52 +0900 |
commit | 4417b9fe9a223aff00c2ef277d74606b0859004c (patch) | |
tree | 582172bc594680011eacf1bde0af58d8f108216b | |
parent | Refactor (diff) | |
download | cotia-4417b9fe9a223aff00c2ef277d74606b0859004c.tar.gz cotia-4417b9fe9a223aff00c2ef277d74606b0859004c.tar.xz |
Add token positions to an AST
-rw-r--r-- | spec/lexer.go | 16 | ||||
-rw-r--r-- | spec/parser.go | 34 | ||||
-rw-r--r-- | spec/parser_test.go | 217 |
3 files changed, 238 insertions, 29 deletions
diff --git a/spec/lexer.go b/spec/lexer.go index 3a5a914..83b1a93 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -34,12 +34,12 @@ const ( tokenKindInvalid = tokenKind("invalid") ) -type position struct { +type Position struct { row int } -func newPosition(row int) position { - return position{ +func newPosition(row int) Position { + return Position{ row: row, } } @@ -48,17 +48,17 @@ type token struct { kind tokenKind text string num int - pos position + pos Position } -func newSymbolToken(kind tokenKind, pos position) *token { +func newSymbolToken(kind tokenKind, pos Position) *token { return &token{ kind: kind, pos: pos, } } -func newIDToken(text string, pos position) *token { +func newIDToken(text string, pos Position) *token { return &token{ kind: tokenKindID, text: text, @@ -66,7 +66,7 @@ func newIDToken(text string, pos position) *token { } } -func newTerminalPatternToken(text string, pos position) *token { +func newTerminalPatternToken(text string, pos Position) *token { return &token{ kind: tokenKindTerminalPattern, text: text, @@ -74,7 +74,7 @@ func newTerminalPatternToken(text string, pos position) *token { } } -func newPositionToken(num int, pos position) *token { +func newPositionToken(num int, pos Position) *token { return &token{ kind: tokenKindPosition, num: num, diff --git a/spec/parser.go b/spec/parser.go index e749d72..7f37598 100644 --- a/spec/parser.go +++ b/spec/parser.go @@ -17,6 +17,7 @@ type ProductionNode struct { Directive *DirectiveNode LHS string RHS []*AlternativeNode + Pos Position } func (n *ProductionNode) isLexical() bool { @@ -29,36 +30,43 @@ func (n *ProductionNode) isLexical() bool { type AlternativeNode struct { Elements []*ElementNode Directive *DirectiveNode + Pos Position } type ElementNode struct { ID string Pattern string + Pos Position } type DirectiveNode struct { Name string Parameters []*ParameterNode + Pos Position } type ParameterNode struct { ID string Tree *TreeStructNode + Pos Position } type TreeStructNode struct { Name string Children []*TreeChildNode + Pos Position } type TreeChildNode struct { Position int Expansion bool + Pos Position } type FragmentNode struct { LHS string RHS string + Pos Position } func raiseSyntaxError(row int, synErr *SyntaxError) { @@ -85,7 +93,7 @@ type parser struct { // A token position that the parser read at last. // It is used as additional information in error messages. - pos position + pos Position } func newParser(src io.Reader) (*parser, error) { @@ -181,6 +189,7 @@ func (p *parser) parseFragment() *FragmentNode { raiseSyntaxError(p.pos.row, synErrNoProductionName) } lhs := p.lastTok.text + lhsPos := p.lastTok.pos p.consume(tokenKindNewline) @@ -208,6 +217,7 @@ func (p *parser) parseFragment() *FragmentNode { return &FragmentNode{ LHS: lhs, RHS: rhs, + Pos: lhsPos, } } @@ -246,6 +256,7 @@ func (p *parser) parseProduction() *ProductionNode { raiseSyntaxError(p.pos.row, synErrNoProductionName) } lhs := p.lastTok.text + lhsPos := p.lastTok.pos p.consume(tokenKindNewline) @@ -281,6 +292,7 @@ func (p *parser) parseProduction() *ProductionNode { Directive: dir, LHS: lhs, RHS: rhs, + Pos: lhsPos, } } @@ -294,11 +306,18 @@ func (p *parser) parseAlternative() *AlternativeNode { elems = append(elems, elem) } + // When a length of an alternative is zero, we cannot set a position. + var firstElemPos Position + if len(elems) > 0 { + firstElemPos = elems[0].Pos + } + dir := p.parseDirective() return &AlternativeNode{ Elements: elems, Directive: dir, + Pos: firstElemPos, } } @@ -306,11 +325,13 @@ func (p *parser) parseElement() *ElementNode { switch { case p.consume(tokenKindID): return &ElementNode{ - ID: p.lastTok.text, + ID: p.lastTok.text, + Pos: p.lastTok.pos, } case p.consume(tokenKindTerminalPattern): return &ElementNode{ Pattern: p.lastTok.text, + Pos: p.lastTok.pos, } } return nil @@ -320,6 +341,7 @@ func (p *parser) parseDirective() *DirectiveNode { if !p.consume(tokenKindDirectiveMarker) { return nil } + dirPos := p.lastTok.pos if !p.consume(tokenKindID) { raiseSyntaxError(p.pos.row, synErrNoDirectiveName) @@ -338,6 +360,7 @@ func (p *parser) parseDirective() *DirectiveNode { return &DirectiveNode{ Name: name, Parameters: params, + Pos: dirPos, } } @@ -345,13 +368,15 @@ func (p *parser) parseParameter() *ParameterNode { switch { case p.consume(tokenKindID): return &ParameterNode{ - ID: p.lastTok.text, + ID: p.lastTok.text, + Pos: p.lastTok.pos, } case p.consume(tokenKindTreeNodeOpen): if !p.consume(tokenKindID) { raiseSyntaxError(p.pos.row, synErrTreeInvalidFirstElem) } name := p.lastTok.text + namePos := p.lastTok.pos var children []*TreeChildNode for { @@ -361,6 +386,7 @@ func (p *parser) parseParameter() *ParameterNode { child := &TreeChildNode{ Position: p.lastTok.num, + Pos: p.lastTok.pos, } if p.consume(tokenKindExpantion) { child.Expansion = true @@ -377,7 +403,9 @@ func (p *parser) parseParameter() *ParameterNode { Tree: &TreeStructNode{ Name: name, Children: children, + Pos: namePos, }, + Pos: namePos, } } diff --git a/spec/parser_test.go b/spec/parser_test.go index c696af0..5a54b83 100644 --- a/spec/parser_test.go +++ b/spec/parser_test.go @@ -14,6 +14,10 @@ func TestParse(t *testing.T) { RHS: alts, } } + withProdPos := func(prod *ProductionNode, pos Position) *ProductionNode { + prod.Pos = pos + return prod + } withProdDir := func(prod *ProductionNode, dir *DirectiveNode) *ProductionNode { prod.Directive = dir return prod @@ -23,6 +27,10 @@ func TestParse(t *testing.T) { Elements: elems, } } + withAltPos := func(alt *AlternativeNode, pos Position) *AlternativeNode { + alt.Pos = pos + return alt + } withAltDir := func(alt *AlternativeNode, dir *DirectiveNode) *AlternativeNode { alt.Directive = dir return alt @@ -33,6 +41,10 @@ func TestParse(t *testing.T) { Parameters: params, } } + withDirPos := func(dir *DirectiveNode, pos Position) *DirectiveNode { + dir.Pos = pos + return dir + } idParam := func(id string) *ParameterNode { return &ParameterNode{ ID: id, @@ -46,6 +58,10 @@ func TestParse(t *testing.T) { }, } } + withParamPos := func(param *ParameterNode, pos Position) *ParameterNode { + param.Pos = pos + return param + } pos := func(pos int) *TreeChildNode { return &TreeChildNode{ Position: pos, @@ -55,6 +71,10 @@ func TestParse(t *testing.T) { c.Expansion = true return c } + withTreeChildPos := func(child *TreeChildNode, pos Position) *TreeChildNode { + child.Pos = pos + return child + } id := func(id string) *ElementNode { return &ElementNode{ ID: id, @@ -65,18 +85,27 @@ func TestParse(t *testing.T) { Pattern: p, } } + withElemPos := func(elem *ElementNode, pos Position) *ElementNode { + elem.Pos = pos + return elem + } frag := func(lhs string, rhs string) *FragmentNode { return &FragmentNode{ LHS: lhs, RHS: rhs, } } + withFragmentPos := func(frag *FragmentNode, pos Position) *FragmentNode { + frag.Pos = pos + return frag + } tests := []struct { - caption string - src string - ast *RootNode - synErr *SyntaxError + caption string + src string + checkPosition bool + ast *RootNode + synErr *SyntaxError }{ { caption: "single production is a valid grammar", @@ -348,6 +377,114 @@ foo: "foo"; `, synErr: synErrTreeUnclosed, }, + { + caption: "an AST has node positions", + src: ` +#mode default +exp + : exp "\+" id #ast '(exp $1 $2) + | id + ; +whitespace: "\u{0020}+" #skip; +id: "\f{letter}(\f{letter}|\f{number})*"; +fragment letter: "[A-Za-z_]"; +fragment number: "[0-9]"; +`, + checkPosition: true, + ast: &RootNode{ + Productions: []*ProductionNode{ + withProdPos( + withProdDir( + prod("exp", + withAltPos( + withAltDir( + alt( + withElemPos(id("exp"), newPosition(4)), + withElemPos(pat(`\+`), newPosition(4)), + withElemPos(id("id"), newPosition(4)), + ), + withDirPos( + dir("ast", + withParamPos( + treeParam("exp", + withTreeChildPos(pos(1), newPosition(4)), + withTreeChildPos(pos(2), newPosition(4))), + newPosition(4), + ), + ), + newPosition(4), + ), + ), + newPosition(4), + ), + withAltPos( + alt( + withElemPos(id("id"), newPosition(5)), + ), + newPosition(5), + ), + ), + withDirPos( + dir("mode", + withParamPos( + idParam("default"), + newPosition(2), + ), + ), + newPosition(2), + ), + ), + newPosition(3), + ), + }, + LexProductions: []*ProductionNode{ + withProdPos( + prod("whitespace", + withAltPos( + withAltDir( + alt( + withElemPos( + pat(`\u{0020}+`), + newPosition(7), + ), + ), + withDirPos( + dir("skip"), + newPosition(7), + ), + ), + newPosition(7), + ), + ), + newPosition(7), + ), + withProdPos( + prod("id", + withAltPos( + alt( + withElemPos( + pat(`\f{letter}(\f{letter}|\f{number})*`), + newPosition(8), + ), + ), + newPosition(8), + ), + ), + newPosition(8), + ), + }, + Fragments: []*FragmentNode{ + withFragmentPos( + frag("letter", "[A-Za-z_]"), + newPosition(9), + ), + withFragmentPos( + frag("number", "[0-9]"), + newPosition(10), + ), + }, + }, + }, } for _, tt := range tests { t.Run(tt.caption, func(t *testing.T) { @@ -371,26 +508,29 @@ foo: "foo"; if ast == nil { t.Fatalf("AST must be non-nil") } - testRootNode(t, ast, tt.ast) + testRootNode(t, ast, tt.ast, tt.checkPosition) } }) } } -func testRootNode(t *testing.T, root, expected *RootNode) { +func testRootNode(t *testing.T, root, expected *RootNode, checkPosition bool) { t.Helper() if len(root.Productions) != len(expected.Productions) { t.Fatalf("unexpected length of productions; want: %v, got: %v", len(expected.Productions), len(root.Productions)) } for i, prod := range root.Productions { - testProductionNode(t, prod, expected.Productions[i]) + testProductionNode(t, prod, expected.Productions[i], checkPosition) } for i, prod := range root.LexProductions { - testProductionNode(t, prod, expected.LexProductions[i]) + testProductionNode(t, prod, expected.LexProductions[i], checkPosition) + } + for i, frag := range root.Fragments { + testFragmentNode(t, frag, expected.Fragments[i], checkPosition) } } -func testProductionNode(t *testing.T, prod, expected *ProductionNode) { +func testProductionNode(t *testing.T, prod, expected *ProductionNode, checkPosition bool) { t.Helper() if expected.Directive == nil && prod.Directive != nil { t.Fatalf("unexpected directive; want: nil, got: %+v", prod.Directive) @@ -399,7 +539,7 @@ func testProductionNode(t *testing.T, prod, expected *ProductionNode) { if prod.Directive == nil { t.Fatalf("a directive is not set; want: %+v, got: nil", expected.Directive) } - testDirective(t, prod.Directive, expected.Directive) + testDirective(t, prod.Directive, expected.Directive, checkPosition) } if prod.LHS != expected.LHS { t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, prod.LHS) @@ -408,17 +548,33 @@ func testProductionNode(t *testing.T, prod, expected *ProductionNode) { t.Fatalf("unexpected length of an RHS; want: %v, got: %v", len(expected.RHS), len(prod.RHS)) } for i, alt := range prod.RHS { - testAlternativeNode(t, alt, expected.RHS[i]) + testAlternativeNode(t, alt, expected.RHS[i], checkPosition) + } + if checkPosition { + testPosition(t, prod.Pos, expected.Pos) } } -func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode) { +func testFragmentNode(t *testing.T, frag, expected *FragmentNode, checkPosition bool) { + t.Helper() + if frag.LHS != expected.LHS { + t.Fatalf("unexpected LHS; want: %v, got: %v", expected.LHS, frag.LHS) + } + if frag.RHS != expected.RHS { + t.Fatalf("unexpected RHS; want: %v, got: %v", expected.RHS, frag.RHS) + } + if checkPosition { + testPosition(t, frag.Pos, expected.Pos) + } +} + +func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode, checkPosition bool) { t.Helper() if len(alt.Elements) != len(expected.Elements) { t.Fatalf("unexpected length of elements; want: %v, got: %v", len(expected.Elements), len(alt.Elements)) } for i, elem := range alt.Elements { - testElementNode(t, elem, expected.Elements[i]) + testElementNode(t, elem, expected.Elements[i], checkPosition) } if expected.Directive == nil && alt.Directive != nil { t.Fatalf("unexpected directive; want: nil, got: %+v", alt.Directive) @@ -427,18 +583,27 @@ func testAlternativeNode(t *testing.T, alt, expected *AlternativeNode) { if alt.Directive == nil { t.Fatalf("a directive is not set; want: %+v, got: nil", expected.Directive) } - testDirective(t, alt.Directive, expected.Directive) + testDirective(t, alt.Directive, expected.Directive, checkPosition) + } + if checkPosition { + testPosition(t, alt.Pos, expected.Pos) } } -func testElementNode(t *testing.T, elem, expected *ElementNode) { +func testElementNode(t *testing.T, elem, expected *ElementNode, checkPosition bool) { t.Helper() + if elem.ID != expected.ID { + t.Fatalf("unexpected ID; want: %v, got: %v", expected.ID, elem.ID) + } if elem.Pattern != expected.Pattern { t.Fatalf("unexpected pattern; want: %v, got: %v", expected.Pattern, elem.Pattern) } + if checkPosition { + testPosition(t, elem.Pos, expected.Pos) + } } -func testDirective(t *testing.T, dir, expected *DirectiveNode) { +func testDirective(t *testing.T, dir, expected *DirectiveNode, checkPosition bool) { t.Helper() if expected.Name != dir.Name { t.Fatalf("unexpected directive name; want: %+v, got: %+v", expected.Name, dir.Name) @@ -447,11 +612,14 @@ func testDirective(t *testing.T, dir, expected *DirectiveNode) { t.Fatalf("unexpected directive parameter; want: %+v, got: %+v", expected.Parameters, dir.Parameters) } for i, param := range dir.Parameters { - testParameter(t, param, expected.Parameters[i]) + testParameter(t, param, expected.Parameters[i], checkPosition) + } + if checkPosition { + testPosition(t, dir.Pos, expected.Pos) } } -func testParameter(t *testing.T, param, expected *ParameterNode) { +func testParameter(t *testing.T, param, expected *ParameterNode, checkPosition bool) { t.Helper() if param.ID != expected.ID { t.Fatalf("unexpected ID parameter; want: %v, got: %v", expected.ID, param.ID) @@ -474,6 +642,19 @@ func testParameter(t *testing.T, param, expected *ParameterNode) { if c.Position != e.Position || c.Expansion != e.Expansion { t.Fatalf("unexpected child; want: %+v, got: %+v", e, c) } + if checkPosition { + testPosition(t, c.Pos, e.Pos) + } } } + if checkPosition { + testPosition(t, param.Pos, expected.Pos) + } +} + +func testPosition(t *testing.T, pos, expected Position) { + t.Helper() + if pos.row != expected.row { + t.Fatalf("unexpected position want: %+v, got: %+v", expected, pos) + } } |