diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-07-16 23:48:48 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-07-16 23:48:48 +0900 |
commit | a4620e8c1c7ec79beea1b3468af3c969016123b0 (patch) | |
tree | 05ad4a48731c451172068c7fd617df144305178f | |
parent | Add testing for the driver (diff) | |
download | urubu-a4620e8c1c7ec79beea1b3468af3c969016123b0.tar.gz urubu-a4620e8c1c7ec79beea1b3468af3c969016123b0.tar.xz |
Add a line number to error messages
-rw-r--r-- | error/error.go | 15 | ||||
-rw-r--r-- | spec/lexer.go | 63 | ||||
-rw-r--r-- | spec/lexer_test.go | 74 | ||||
-rw-r--r-- | spec/parser.go | 47 | ||||
-rw-r--r-- | spec/parser_test.go | 8 | ||||
-rw-r--r-- | spec/syntax_error.go | 4 |
6 files changed, 137 insertions, 74 deletions
diff --git a/error/error.go b/error/error.go new file mode 100644 index 0000000..1745c52 --- /dev/null +++ b/error/error.go @@ -0,0 +1,15 @@ +package error + +import "fmt" + +type SpecError struct { + Cause error + Row int +} + +func (e *SpecError) Error() string { + if e.Row == 0 { + return fmt.Sprintf("error: %v", e.Cause) + } + return fmt.Sprintf("%v: error: %v", e.Row, e.Cause) +} diff --git a/spec/lexer.go b/spec/lexer.go index a1bd6fc..b19a771 100644 --- a/spec/lexer.go +++ b/spec/lexer.go @@ -33,36 +33,51 @@ const ( tokenKindInvalid = tokenKind("invalid") ) +type position struct { + row int +} + +func newPosition(row int) position { + return position{ + row: row, + } +} + type token struct { kind tokenKind text string num int + pos position } -func newSymbolToken(kind tokenKind) *token { +func newSymbolToken(kind tokenKind, pos position) *token { return &token{ kind: kind, + pos: pos, } } -func newIDToken(text string) *token { +func newIDToken(text string, pos position) *token { return &token{ kind: tokenKindID, text: text, + pos: pos, } } -func newTerminalPatternToken(text string) *token { +func newTerminalPatternToken(text string, pos position) *token { return &token{ kind: tokenKindTerminalPattern, text: text, + pos: pos, } } -func newPositionToken(num int) *token { +func newPositionToken(num int, pos position) *token { return &token{ kind: tokenKindPosition, num: num, + pos: pos, } } @@ -83,6 +98,7 @@ type lexer struct { s *mlspec.CompiledLexSpec d *mldriver.Lexer buf *token + row int } //go:embed clexspec.json @@ -99,8 +115,9 @@ func newLexer(src io.Reader) (*lexer, error) { return nil, err } return &lexer{ - s: s, - d: d, + s: s, + d: d, + row: 1, }, nil } @@ -111,20 +128,20 @@ func (l *lexer) next() (*token, error) { return tok, nil } - newline := false + var newline *token for { tok, err := l.lexAndSkipWSs() if err != nil { return nil, err } if tok.kind == tokenKindNewline { - newline = true + newline = tok continue } - if newline { + if newline != nil { l.buf = tok - return newSymbolToken(tokenKindNewline), nil + return newline, nil } return tok, nil } @@ -156,14 +173,16 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { switch tok.KindName { case "newline": - return newSymbolToken(tokenKindNewline), nil + row := l.row + l.row++ + return newSymbolToken(tokenKindNewline, newPosition(row)), nil case "kw_fragment": - return newSymbolToken(tokenKindKWFragment), nil + return newSymbolToken(tokenKindKWFragment, newPosition(l.row)), nil case "identifier": if strings.HasPrefix(tok.Text(), "_") { return nil, synErrAutoGenID } - return newIDToken(tok.Text()), nil + return newIDToken(tok.Text(), newPosition(l.row)), nil case "terminal_open": var b strings.Builder for { @@ -181,21 +200,21 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { case "escape_symbol": return nil, synErrIncompletedEscSeq case "terminal_close": - return newTerminalPatternToken(b.String()), nil + return newTerminalPatternToken(b.String(), newPosition(l.row)), nil } } case "colon": - return newSymbolToken(tokenKindColon), nil + return newSymbolToken(tokenKindColon, newPosition(l.row)), nil case "or": - return newSymbolToken(tokenKindOr), nil + return newSymbolToken(tokenKindOr, newPosition(l.row)), nil case "semicolon": - return newSymbolToken(tokenKindSemicolon), nil + return newSymbolToken(tokenKindSemicolon, newPosition(l.row)), nil case "directive_marker": - return newSymbolToken(tokenKindDirectiveMarker), nil + return newSymbolToken(tokenKindDirectiveMarker, newPosition(l.row)), nil case "tree_node_open": - return newSymbolToken(tokenKindTreeNodeOpen), nil + return newSymbolToken(tokenKindTreeNodeOpen, newPosition(l.row)), nil case "tree_node_close": - return newSymbolToken(tokenKindTreeNodeClose), nil + return newSymbolToken(tokenKindTreeNodeClose, newPosition(l.row)), nil case "position": // Remove '$' character and convert to an integer. num, err := strconv.Atoi(tok.Text()[1:]) @@ -205,9 +224,9 @@ func (l *lexer) lexAndSkipWSs() (*token, error) { if num == 0 { return nil, synErrZeroPos } - return newPositionToken(num), nil + return newPositionToken(num, newPosition(l.row)), nil case "expansion": - return newSymbolToken(tokenKindExpantion), nil + return newSymbolToken(tokenKindExpantion, newPosition(l.row)), nil default: return newInvalidToken(tok.Text()), nil } diff --git a/spec/lexer_test.go b/spec/lexer_test.go index 0209b59..a5de136 100644 --- a/spec/lexer_test.go +++ b/spec/lexer_test.go @@ -6,6 +6,22 @@ import ( ) func TestLexer_Run(t *testing.T) { + idTok := func(text string) *token { + return newIDToken(text, newPosition(1)) + } + + termPatTok := func(text string) *token { + return newTerminalPatternToken(text, newPosition(1)) + } + + symTok := func(kind tokenKind) *token { + return newSymbolToken(kind, newPosition(1)) + } + + posTok := func(num int) *token { + return newPositionToken(num, newPosition(1)) + } + tests := []struct { caption string src string @@ -16,16 +32,16 @@ func TestLexer_Run(t *testing.T) { caption: "the lexer can recognize all kinds of tokens", src: `id"terminal":|;#'()$1...`, tokens: []*token{ - newIDToken("id"), - newTerminalPatternToken("terminal"), - newSymbolToken(tokenKindColon), - newSymbolToken(tokenKindOr), - newSymbolToken(tokenKindSemicolon), - newSymbolToken(tokenKindDirectiveMarker), - newSymbolToken(tokenKindTreeNodeOpen), - newSymbolToken(tokenKindTreeNodeClose), - newPositionToken(1), - newSymbolToken(tokenKindExpantion), + idTok("id"), + termPatTok("terminal"), + symTok(tokenKindColon), + symTok(tokenKindOr), + symTok(tokenKindSemicolon), + symTok(tokenKindDirectiveMarker), + symTok(tokenKindTreeNodeOpen), + symTok(tokenKindTreeNodeClose), + posTok(1), + symTok(tokenKindExpantion), newEOFToken(), }, }, @@ -33,7 +49,7 @@ func TestLexer_Run(t *testing.T) { caption: "the lexer can recognize keywords", src: `fragment`, tokens: []*token{ - newSymbolToken(tokenKindKWFragment), + symTok(tokenKindKWFragment), newEOFToken(), }, }, @@ -41,7 +57,7 @@ func TestLexer_Run(t *testing.T) { caption: "the lexer can recognize character sequences and escape sequences in terminal", src: `"abc\"\\"`, tokens: []*token{ - newTerminalPatternToken(`abc"\\`), + termPatTok(`abc"\\`), newEOFToken(), }, }, @@ -49,13 +65,13 @@ func TestLexer_Run(t *testing.T) { caption: "the lexer can recognize newlines and combine consecutive newlines into one", src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A", tokens: []*token{ - newSymbolToken(tokenKindNewline), - newSymbolToken(tokenKindOr), - newSymbolToken(tokenKindNewline), - newSymbolToken(tokenKindOr), - newSymbolToken(tokenKindNewline), - newSymbolToken(tokenKindOr), - newSymbolToken(tokenKindNewline), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), + symTok(tokenKindOr), + symTok(tokenKindNewline), newEOFToken(), }, }, @@ -69,11 +85,11 @@ foo bar // This is the fourth comment. `, tokens: []*token{ - newSymbolToken(tokenKindNewline), - newIDToken("foo"), - newSymbolToken(tokenKindNewline), - newIDToken("bar"), - newSymbolToken(tokenKindNewline), + symTok(tokenKindNewline), + idTok("foo"), + symTok(tokenKindNewline), + idTok("bar"), + symTok(tokenKindNewline), newEOFToken(), }, }, @@ -101,9 +117,9 @@ bar // This is the fourth comment. caption: "the lexer can recognize valid tokens following an invalid token", src: `abc!!!def`, tokens: []*token{ - newIDToken("abc"), + idTok("abc"), newInvalidToken("!!!"), - newIDToken("def"), + idTok("def"), newEOFToken(), }, }, @@ -113,9 +129,9 @@ bar // This is the fourth comment. // \u0020: SP src: "a\u0009b\u0020c", tokens: []*token{ - newIDToken("a"), - newIDToken("b"), - newIDToken("c"), + idTok("a"), + idTok("b"), + idTok("c"), newEOFToken(), }, }, diff --git a/spec/parser.go b/spec/parser.go index da17498..1bd2fb4 100644 --- a/spec/parser.go +++ b/spec/parser.go @@ -2,6 +2,8 @@ package spec import ( "io" + + verr "github.com/nihei9/vartan/error" ) type RootNode struct { @@ -50,8 +52,11 @@ type FragmentNode struct { RHS string } -func raiseSyntaxError(synErr *SyntaxError) { - panic(synErr) +func raiseSyntaxError(row int, synErr *SyntaxError) { + panic(&verr.SpecError{ + Cause: synErr, + Row: row, + }) } func Parse(src io.Reader) (*RootNode, error) { @@ -70,6 +75,10 @@ type parser struct { lex *lexer peekedTok *token lastTok *token + + // A token position that the parser read at last. + // It is used as additional information in error messages. + pos position } func newParser(src io.Reader) (*parser, error) { @@ -114,7 +123,7 @@ func (p *parser) parseRoot() *RootNode { break } if len(prods) == 0 { - raiseSyntaxError(synErrNoProduction) + raiseSyntaxError(0, synErrNoProduction) } return &RootNode{ @@ -131,30 +140,30 @@ func (p *parser) parseFragment() *FragmentNode { p.consume(tokenKindNewline) if !p.consume(tokenKindID) { - raiseSyntaxError(synErrNoProductionName) + raiseSyntaxError(p.pos.row, synErrNoProductionName) } lhs := p.lastTok.text p.consume(tokenKindNewline) if !p.consume(tokenKindColon) { - raiseSyntaxError(synErrNoColon) + raiseSyntaxError(p.pos.row, synErrNoColon) } if !p.consume(tokenKindTerminalPattern) { - raiseSyntaxError(synErrFragmentNoPattern) + raiseSyntaxError(p.pos.row, synErrFragmentNoPattern) } rhs := p.lastTok.text p.consume(tokenKindNewline) if !p.consume(tokenKindSemicolon) { - raiseSyntaxError(synErrNoSemicolon) + raiseSyntaxError(p.pos.row, synErrNoSemicolon) } if !p.consume(tokenKindNewline) { if !p.consume(tokenKindEOF) { - raiseSyntaxError(synErrSemicolonNoNewline) + raiseSyntaxError(p.pos.row, synErrSemicolonNoNewline) } } @@ -172,19 +181,19 @@ func (p *parser) parseProduction() *ProductionNode { dir := p.parseDirective() if dir != nil { if !p.consume(tokenKindNewline) { - raiseSyntaxError(synErrProdDirNoNewline) + raiseSyntaxError(p.pos.row, synErrProdDirNoNewline) } } if !p.consume(tokenKindID) { - raiseSyntaxError(synErrNoProductionName) + raiseSyntaxError(p.pos.row, synErrNoProductionName) } lhs := p.lastTok.text p.consume(tokenKindNewline) if !p.consume(tokenKindColon) { - raiseSyntaxError(synErrNoColon) + raiseSyntaxError(p.pos.row, synErrNoColon) } alt := p.parseAlternative() @@ -202,12 +211,12 @@ func (p *parser) parseProduction() *ProductionNode { p.consume(tokenKindNewline) if !p.consume(tokenKindSemicolon) { - raiseSyntaxError(synErrNoSemicolon) + raiseSyntaxError(p.pos.row, synErrNoSemicolon) } if !p.consume(tokenKindNewline) { if !p.consume(tokenKindEOF) { - raiseSyntaxError(synErrSemicolonNoNewline) + raiseSyntaxError(p.pos.row, synErrSemicolonNoNewline) } } @@ -256,7 +265,7 @@ func (p *parser) parseDirective() *DirectiveNode { } if !p.consume(tokenKindID) { - raiseSyntaxError(synErrNoDirectiveName) + raiseSyntaxError(p.pos.row, synErrNoDirectiveName) } name := p.lastTok.text @@ -283,7 +292,7 @@ func (p *parser) parseParameter() *ParameterNode { } case p.consume(tokenKindTreeNodeOpen): if !p.consume(tokenKindID) { - raiseSyntaxError(synErrTreeInvalidFirstElem) + raiseSyntaxError(p.pos.row, synErrTreeInvalidFirstElem) } name := p.lastTok.text @@ -304,7 +313,7 @@ func (p *parser) parseParameter() *ParameterNode { } if !p.consume(tokenKindTreeNodeClose) { - raiseSyntaxError(synErrTreeUnclosed) + raiseSyntaxError(p.pos.row, synErrTreeUnclosed) } return &ParameterNode{ @@ -330,15 +339,15 @@ func (p *parser) consume(expected tokenKind) bool { panic(err) } } - p.lastTok = tok + p.pos = tok.pos if tok.kind == tokenKindInvalid { - raiseSyntaxError(synErrInvalidToken) + raiseSyntaxError(p.pos.row, synErrInvalidToken) } if tok.kind == expected { + p.lastTok = tok return true } p.peekedTok = tok - p.lastTok = nil return false } diff --git a/spec/parser_test.go b/spec/parser_test.go index 628dd6f..8500a02 100644 --- a/spec/parser_test.go +++ b/spec/parser_test.go @@ -3,6 +3,8 @@ package spec import ( "strings" "testing" + + verr "github.com/nihei9/vartan/error" ) func TestParse(t *testing.T) { @@ -348,9 +350,13 @@ foo: "foo"; t.Run(tt.caption, func(t *testing.T) { ast, err := Parse(strings.NewReader(tt.src)) if tt.synErr != nil { - if tt.synErr != err { + synErr, ok := err.(*verr.SpecError) + if !ok { t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, err) } + if tt.synErr != synErr.Cause { + t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, synErr.Cause) + } if ast != nil { t.Fatalf("AST must be nil") } diff --git a/spec/syntax_error.go b/spec/syntax_error.go index 59ef62d..041486d 100644 --- a/spec/syntax_error.go +++ b/spec/syntax_error.go @@ -1,7 +1,5 @@ package spec -import "fmt" - type SyntaxError struct { message string } @@ -13,7 +11,7 @@ func newSyntaxError(message string) *SyntaxError { } func (e *SyntaxError) Error() string { - return fmt.Sprintf("syntax error: %s", e.message) + return e.message } var ( |