aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-07-16 23:48:48 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-07-16 23:48:48 +0900
commita4620e8c1c7ec79beea1b3468af3c969016123b0 (patch)
tree05ad4a48731c451172068c7fd617df144305178f
parentAdd testing for the driver (diff)
downloadurubu-a4620e8c1c7ec79beea1b3468af3c969016123b0.tar.gz
urubu-a4620e8c1c7ec79beea1b3468af3c969016123b0.tar.xz
Add a line number to error messages
-rw-r--r--error/error.go15
-rw-r--r--spec/lexer.go63
-rw-r--r--spec/lexer_test.go74
-rw-r--r--spec/parser.go47
-rw-r--r--spec/parser_test.go8
-rw-r--r--spec/syntax_error.go4
6 files changed, 137 insertions, 74 deletions
diff --git a/error/error.go b/error/error.go
new file mode 100644
index 0000000..1745c52
--- /dev/null
+++ b/error/error.go
@@ -0,0 +1,15 @@
+package error
+
+import "fmt"
+
+type SpecError struct {
+ Cause error
+ Row int
+}
+
+func (e *SpecError) Error() string {
+ if e.Row == 0 {
+ return fmt.Sprintf("error: %v", e.Cause)
+ }
+ return fmt.Sprintf("%v: error: %v", e.Row, e.Cause)
+}
diff --git a/spec/lexer.go b/spec/lexer.go
index a1bd6fc..b19a771 100644
--- a/spec/lexer.go
+++ b/spec/lexer.go
@@ -33,36 +33,51 @@ const (
tokenKindInvalid = tokenKind("invalid")
)
+type position struct {
+ row int
+}
+
+func newPosition(row int) position {
+ return position{
+ row: row,
+ }
+}
+
type token struct {
kind tokenKind
text string
num int
+ pos position
}
-func newSymbolToken(kind tokenKind) *token {
+func newSymbolToken(kind tokenKind, pos position) *token {
return &token{
kind: kind,
+ pos: pos,
}
}
-func newIDToken(text string) *token {
+func newIDToken(text string, pos position) *token {
return &token{
kind: tokenKindID,
text: text,
+ pos: pos,
}
}
-func newTerminalPatternToken(text string) *token {
+func newTerminalPatternToken(text string, pos position) *token {
return &token{
kind: tokenKindTerminalPattern,
text: text,
+ pos: pos,
}
}
-func newPositionToken(num int) *token {
+func newPositionToken(num int, pos position) *token {
return &token{
kind: tokenKindPosition,
num: num,
+ pos: pos,
}
}
@@ -83,6 +98,7 @@ type lexer struct {
s *mlspec.CompiledLexSpec
d *mldriver.Lexer
buf *token
+ row int
}
//go:embed clexspec.json
@@ -99,8 +115,9 @@ func newLexer(src io.Reader) (*lexer, error) {
return nil, err
}
return &lexer{
- s: s,
- d: d,
+ s: s,
+ d: d,
+ row: 1,
}, nil
}
@@ -111,20 +128,20 @@ func (l *lexer) next() (*token, error) {
return tok, nil
}
- newline := false
+ var newline *token
for {
tok, err := l.lexAndSkipWSs()
if err != nil {
return nil, err
}
if tok.kind == tokenKindNewline {
- newline = true
+ newline = tok
continue
}
- if newline {
+ if newline != nil {
l.buf = tok
- return newSymbolToken(tokenKindNewline), nil
+ return newline, nil
}
return tok, nil
}
@@ -156,14 +173,16 @@ func (l *lexer) lexAndSkipWSs() (*token, error) {
switch tok.KindName {
case "newline":
- return newSymbolToken(tokenKindNewline), nil
+ row := l.row
+ l.row++
+ return newSymbolToken(tokenKindNewline, newPosition(row)), nil
case "kw_fragment":
- return newSymbolToken(tokenKindKWFragment), nil
+ return newSymbolToken(tokenKindKWFragment, newPosition(l.row)), nil
case "identifier":
if strings.HasPrefix(tok.Text(), "_") {
return nil, synErrAutoGenID
}
- return newIDToken(tok.Text()), nil
+ return newIDToken(tok.Text(), newPosition(l.row)), nil
case "terminal_open":
var b strings.Builder
for {
@@ -181,21 +200,21 @@ func (l *lexer) lexAndSkipWSs() (*token, error) {
case "escape_symbol":
return nil, synErrIncompletedEscSeq
case "terminal_close":
- return newTerminalPatternToken(b.String()), nil
+ return newTerminalPatternToken(b.String(), newPosition(l.row)), nil
}
}
case "colon":
- return newSymbolToken(tokenKindColon), nil
+ return newSymbolToken(tokenKindColon, newPosition(l.row)), nil
case "or":
- return newSymbolToken(tokenKindOr), nil
+ return newSymbolToken(tokenKindOr, newPosition(l.row)), nil
case "semicolon":
- return newSymbolToken(tokenKindSemicolon), nil
+ return newSymbolToken(tokenKindSemicolon, newPosition(l.row)), nil
case "directive_marker":
- return newSymbolToken(tokenKindDirectiveMarker), nil
+ return newSymbolToken(tokenKindDirectiveMarker, newPosition(l.row)), nil
case "tree_node_open":
- return newSymbolToken(tokenKindTreeNodeOpen), nil
+ return newSymbolToken(tokenKindTreeNodeOpen, newPosition(l.row)), nil
case "tree_node_close":
- return newSymbolToken(tokenKindTreeNodeClose), nil
+ return newSymbolToken(tokenKindTreeNodeClose, newPosition(l.row)), nil
case "position":
// Remove '$' character and convert to an integer.
num, err := strconv.Atoi(tok.Text()[1:])
@@ -205,9 +224,9 @@ func (l *lexer) lexAndSkipWSs() (*token, error) {
if num == 0 {
return nil, synErrZeroPos
}
- return newPositionToken(num), nil
+ return newPositionToken(num, newPosition(l.row)), nil
case "expansion":
- return newSymbolToken(tokenKindExpantion), nil
+ return newSymbolToken(tokenKindExpantion, newPosition(l.row)), nil
default:
return newInvalidToken(tok.Text()), nil
}
diff --git a/spec/lexer_test.go b/spec/lexer_test.go
index 0209b59..a5de136 100644
--- a/spec/lexer_test.go
+++ b/spec/lexer_test.go
@@ -6,6 +6,22 @@ import (
)
func TestLexer_Run(t *testing.T) {
+ idTok := func(text string) *token {
+ return newIDToken(text, newPosition(1))
+ }
+
+ termPatTok := func(text string) *token {
+ return newTerminalPatternToken(text, newPosition(1))
+ }
+
+ symTok := func(kind tokenKind) *token {
+ return newSymbolToken(kind, newPosition(1))
+ }
+
+ posTok := func(num int) *token {
+ return newPositionToken(num, newPosition(1))
+ }
+
tests := []struct {
caption string
src string
@@ -16,16 +32,16 @@ func TestLexer_Run(t *testing.T) {
caption: "the lexer can recognize all kinds of tokens",
src: `id"terminal":|;#'()$1...`,
tokens: []*token{
- newIDToken("id"),
- newTerminalPatternToken("terminal"),
- newSymbolToken(tokenKindColon),
- newSymbolToken(tokenKindOr),
- newSymbolToken(tokenKindSemicolon),
- newSymbolToken(tokenKindDirectiveMarker),
- newSymbolToken(tokenKindTreeNodeOpen),
- newSymbolToken(tokenKindTreeNodeClose),
- newPositionToken(1),
- newSymbolToken(tokenKindExpantion),
+ idTok("id"),
+ termPatTok("terminal"),
+ symTok(tokenKindColon),
+ symTok(tokenKindOr),
+ symTok(tokenKindSemicolon),
+ symTok(tokenKindDirectiveMarker),
+ symTok(tokenKindTreeNodeOpen),
+ symTok(tokenKindTreeNodeClose),
+ posTok(1),
+ symTok(tokenKindExpantion),
newEOFToken(),
},
},
@@ -33,7 +49,7 @@ func TestLexer_Run(t *testing.T) {
caption: "the lexer can recognize keywords",
src: `fragment`,
tokens: []*token{
- newSymbolToken(tokenKindKWFragment),
+ symTok(tokenKindKWFragment),
newEOFToken(),
},
},
@@ -41,7 +57,7 @@ func TestLexer_Run(t *testing.T) {
caption: "the lexer can recognize character sequences and escape sequences in terminal",
src: `"abc\"\\"`,
tokens: []*token{
- newTerminalPatternToken(`abc"\\`),
+ termPatTok(`abc"\\`),
newEOFToken(),
},
},
@@ -49,13 +65,13 @@ func TestLexer_Run(t *testing.T) {
caption: "the lexer can recognize newlines and combine consecutive newlines into one",
src: "\u000A | \u000D | \u000D\u000A | \u000A\u000A \u000D\u000D \u000D\u000A\u000D\u000A",
tokens: []*token{
- newSymbolToken(tokenKindNewline),
- newSymbolToken(tokenKindOr),
- newSymbolToken(tokenKindNewline),
- newSymbolToken(tokenKindOr),
- newSymbolToken(tokenKindNewline),
- newSymbolToken(tokenKindOr),
- newSymbolToken(tokenKindNewline),
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
+ symTok(tokenKindOr),
+ symTok(tokenKindNewline),
newEOFToken(),
},
},
@@ -69,11 +85,11 @@ foo
bar // This is the fourth comment.
`,
tokens: []*token{
- newSymbolToken(tokenKindNewline),
- newIDToken("foo"),
- newSymbolToken(tokenKindNewline),
- newIDToken("bar"),
- newSymbolToken(tokenKindNewline),
+ symTok(tokenKindNewline),
+ idTok("foo"),
+ symTok(tokenKindNewline),
+ idTok("bar"),
+ symTok(tokenKindNewline),
newEOFToken(),
},
},
@@ -101,9 +117,9 @@ bar // This is the fourth comment.
caption: "the lexer can recognize valid tokens following an invalid token",
src: `abc!!!def`,
tokens: []*token{
- newIDToken("abc"),
+ idTok("abc"),
newInvalidToken("!!!"),
- newIDToken("def"),
+ idTok("def"),
newEOFToken(),
},
},
@@ -113,9 +129,9 @@ bar // This is the fourth comment.
// \u0020: SP
src: "a\u0009b\u0020c",
tokens: []*token{
- newIDToken("a"),
- newIDToken("b"),
- newIDToken("c"),
+ idTok("a"),
+ idTok("b"),
+ idTok("c"),
newEOFToken(),
},
},
diff --git a/spec/parser.go b/spec/parser.go
index da17498..1bd2fb4 100644
--- a/spec/parser.go
+++ b/spec/parser.go
@@ -2,6 +2,8 @@ package spec
import (
"io"
+
+ verr "github.com/nihei9/vartan/error"
)
type RootNode struct {
@@ -50,8 +52,11 @@ type FragmentNode struct {
RHS string
}
-func raiseSyntaxError(synErr *SyntaxError) {
- panic(synErr)
+func raiseSyntaxError(row int, synErr *SyntaxError) {
+ panic(&verr.SpecError{
+ Cause: synErr,
+ Row: row,
+ })
}
func Parse(src io.Reader) (*RootNode, error) {
@@ -70,6 +75,10 @@ type parser struct {
lex *lexer
peekedTok *token
lastTok *token
+
+ // A token position that the parser read at last.
+ // It is used as additional information in error messages.
+ pos position
}
func newParser(src io.Reader) (*parser, error) {
@@ -114,7 +123,7 @@ func (p *parser) parseRoot() *RootNode {
break
}
if len(prods) == 0 {
- raiseSyntaxError(synErrNoProduction)
+ raiseSyntaxError(0, synErrNoProduction)
}
return &RootNode{
@@ -131,30 +140,30 @@ func (p *parser) parseFragment() *FragmentNode {
p.consume(tokenKindNewline)
if !p.consume(tokenKindID) {
- raiseSyntaxError(synErrNoProductionName)
+ raiseSyntaxError(p.pos.row, synErrNoProductionName)
}
lhs := p.lastTok.text
p.consume(tokenKindNewline)
if !p.consume(tokenKindColon) {
- raiseSyntaxError(synErrNoColon)
+ raiseSyntaxError(p.pos.row, synErrNoColon)
}
if !p.consume(tokenKindTerminalPattern) {
- raiseSyntaxError(synErrFragmentNoPattern)
+ raiseSyntaxError(p.pos.row, synErrFragmentNoPattern)
}
rhs := p.lastTok.text
p.consume(tokenKindNewline)
if !p.consume(tokenKindSemicolon) {
- raiseSyntaxError(synErrNoSemicolon)
+ raiseSyntaxError(p.pos.row, synErrNoSemicolon)
}
if !p.consume(tokenKindNewline) {
if !p.consume(tokenKindEOF) {
- raiseSyntaxError(synErrSemicolonNoNewline)
+ raiseSyntaxError(p.pos.row, synErrSemicolonNoNewline)
}
}
@@ -172,19 +181,19 @@ func (p *parser) parseProduction() *ProductionNode {
dir := p.parseDirective()
if dir != nil {
if !p.consume(tokenKindNewline) {
- raiseSyntaxError(synErrProdDirNoNewline)
+ raiseSyntaxError(p.pos.row, synErrProdDirNoNewline)
}
}
if !p.consume(tokenKindID) {
- raiseSyntaxError(synErrNoProductionName)
+ raiseSyntaxError(p.pos.row, synErrNoProductionName)
}
lhs := p.lastTok.text
p.consume(tokenKindNewline)
if !p.consume(tokenKindColon) {
- raiseSyntaxError(synErrNoColon)
+ raiseSyntaxError(p.pos.row, synErrNoColon)
}
alt := p.parseAlternative()
@@ -202,12 +211,12 @@ func (p *parser) parseProduction() *ProductionNode {
p.consume(tokenKindNewline)
if !p.consume(tokenKindSemicolon) {
- raiseSyntaxError(synErrNoSemicolon)
+ raiseSyntaxError(p.pos.row, synErrNoSemicolon)
}
if !p.consume(tokenKindNewline) {
if !p.consume(tokenKindEOF) {
- raiseSyntaxError(synErrSemicolonNoNewline)
+ raiseSyntaxError(p.pos.row, synErrSemicolonNoNewline)
}
}
@@ -256,7 +265,7 @@ func (p *parser) parseDirective() *DirectiveNode {
}
if !p.consume(tokenKindID) {
- raiseSyntaxError(synErrNoDirectiveName)
+ raiseSyntaxError(p.pos.row, synErrNoDirectiveName)
}
name := p.lastTok.text
@@ -283,7 +292,7 @@ func (p *parser) parseParameter() *ParameterNode {
}
case p.consume(tokenKindTreeNodeOpen):
if !p.consume(tokenKindID) {
- raiseSyntaxError(synErrTreeInvalidFirstElem)
+ raiseSyntaxError(p.pos.row, synErrTreeInvalidFirstElem)
}
name := p.lastTok.text
@@ -304,7 +313,7 @@ func (p *parser) parseParameter() *ParameterNode {
}
if !p.consume(tokenKindTreeNodeClose) {
- raiseSyntaxError(synErrTreeUnclosed)
+ raiseSyntaxError(p.pos.row, synErrTreeUnclosed)
}
return &ParameterNode{
@@ -330,15 +339,15 @@ func (p *parser) consume(expected tokenKind) bool {
panic(err)
}
}
- p.lastTok = tok
+ p.pos = tok.pos
if tok.kind == tokenKindInvalid {
- raiseSyntaxError(synErrInvalidToken)
+ raiseSyntaxError(p.pos.row, synErrInvalidToken)
}
if tok.kind == expected {
+ p.lastTok = tok
return true
}
p.peekedTok = tok
- p.lastTok = nil
return false
}
diff --git a/spec/parser_test.go b/spec/parser_test.go
index 628dd6f..8500a02 100644
--- a/spec/parser_test.go
+++ b/spec/parser_test.go
@@ -3,6 +3,8 @@ package spec
import (
"strings"
"testing"
+
+ verr "github.com/nihei9/vartan/error"
)
func TestParse(t *testing.T) {
@@ -348,9 +350,13 @@ foo: "foo";
t.Run(tt.caption, func(t *testing.T) {
ast, err := Parse(strings.NewReader(tt.src))
if tt.synErr != nil {
- if tt.synErr != err {
+ synErr, ok := err.(*verr.SpecError)
+ if !ok {
t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, err)
}
+ if tt.synErr != synErr.Cause {
+ t.Fatalf("unexpected error; want: %v, got: %v", tt.synErr, synErr.Cause)
+ }
if ast != nil {
t.Fatalf("AST must be nil")
}
diff --git a/spec/syntax_error.go b/spec/syntax_error.go
index 59ef62d..041486d 100644
--- a/spec/syntax_error.go
+++ b/spec/syntax_error.go
@@ -1,7 +1,5 @@
package spec
-import "fmt"
-
type SyntaxError struct {
message string
}
@@ -13,7 +11,7 @@ func newSyntaxError(message string) *SyntaxError {
}
func (e *SyntaxError) Error() string {
- return fmt.Sprintf("syntax error: %s", e.message)
+ return e.message
}
var (