Fix a text representation of an error token

This commit fixes a bug that caused the second and subsequent characters of the text representation of an error token to be missing.
author: Ryo Nihei <nihei.dev@gmail.com> 2021-05-11 01:20:38 +0900
committer: Ryo Nihei <nihei.dev@gmail.com> 2021-05-11 01:20:38 +0900
commit: 89f56948422dad3ab0336b1d97a1e2b9d6554349 (patch)
tree: a17e305e032a64e8912ce8be90d4ce7f76e89475 /driver
parent: Update README and godoc (diff)
download: tre-89f56948422dad3ab0336b1d97a1e2b9d6554349.tar.gz
tre-89f56948422dad3ab0336b1d97a1e2b9d6554349.tar.xz
2 files changed, 51 insertions, 22 deletions
diff --git a/driver/lexer.go b/driver/lexer.go
index 8a9e04a..bac478f 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -1,6 +1,7 @@
 package driver
 
 import (
+	"encoding/json"
 	"fmt"
 	"io"
 	"io/ioutil"
@@ -56,28 +57,25 @@ func (s byteSequence) merge(a byteSequence) byteSequence {
 // Token representes a token.
 type Token struct {
 	// `Mode` represents a number that corresponds to a `ModeName`.
-	Mode spec.LexModeNum `json:"mode"`
+	Mode spec.LexModeNum
 
 	// `ModeName` is a mode name that represents in which mode the lexer detected the token.
-	ModeName spec.LexModeName `json:"mode_name"`
+	ModeName spec.LexModeName
 
 	// `ID` represents an ID that corresponds to a `Kind`.
-	ID int `json:"id"`
+	ID int
 
 	// `Kind` is a kind name that represents what kind the token has.
-	Kind string `json:"kind"`
-
-	// `Match` is a byte sequence matched a pattern of a lexical specification.
-	Match byteSequence `json:"match"`
-
-	// `Text` is a string representation of the `Match`.
-	Text string `json:"text"`
+	Kind string
 
 	// If `EOF` is true, it means the token is the EOF token.
-	EOF bool `json:"eof"`
+	EOF bool
 
 	// If `Invalid` is true, it means the token is an error token.
-	Invalid bool `json:"invalid"`
+	Invalid bool
+
+	// `match` is a byte sequence matched a pattern of a lexical specification.
+	match byteSequence
 }
 
 func newToken(mode spec.LexModeNum, modeName spec.LexModeName, id int, kind string, match byteSequence) *Token {
@@ -86,8 +84,7 @@ func newToken(mode spec.LexModeNum, modeName spec.LexModeName, id int, kind stri
 		ModeName: modeName,
 		ID:       id,
 		Kind:     kind,
-		Match:    match,
-		Text:     string(match.ByteSlice()),
+		match:    match,
 	}
 }
 
@@ -105,20 +102,52 @@ func newInvalidToken(mode spec.LexModeNum, modeName spec.LexModeName, match byte
 		Mode:     mode,
 		ModeName: modeName,
 		ID:       0,
-		Match:    match,
-		Text:     string(match.ByteSlice()),
+		match:    match,
 		Invalid:  true,
 	}
 }
 
 func (t *Token) String() string {
 	if t.Invalid {
-		return fmt.Sprintf("!{mode: %v, mode name: %v, text: %v, byte: %v}", t.Mode, t.ModeName, t.Text, t.Match)
+		return fmt.Sprintf("!{mode: %v, mode name: %v, text: %v, byte: %v}", t.Mode, t.ModeName, t.Text(), t.Match())
 	}
 	if t.EOF {
 		return "{eof}"
 	}
-	return fmt.Sprintf("{mode: %v, mode name: %v, id: %v, kind: %v, text: %v, byte: %v}", t.Mode, t.ModeName, t.ID, t.Kind, t.Text, t.Match)
+	return fmt.Sprintf("{mode: %v, mode name: %v, id: %v, kind: %v, text: %v, byte: %v}", t.Mode, t.ModeName, t.ID, t.Kind, t.Text(), t.Match())
+}
+
+// Match returns a byte slice matched a pattern of a lexical specification.
+func (t *Token) Match() []byte {
+	return t.match.ByteSlice()
+}
+
+// Text returns a string representation of a matched byte sequence.
+func (t *Token) Text() string {
+	return string(t.Match())
+}
+
+func (t *Token) MarshalJSON() ([]byte, error) {
+	m := t.match.ByteSlice()
+	return json.Marshal(struct {
+		Mode     int    `json:"mode"`
+		ModeName string `json:"mode_name"`
+		ID       int    `json:"id"`
+		Kind     string `json:"kind"`
+		Match    []byte `json:"match"`
+		Text     string `json:"text"`
+		EOF      bool   `json:"eof"`
+		Invalid  bool   `json:"invalid"`
+	}{
+		Mode:     t.Mode.Int(),
+		ModeName: t.ModeName.String(),
+		ID:       t.ID,
+		Kind:     t.Kind,
+		Match:    m,
+		Text:     string(m),
+		EOF:      t.EOF,
+		Invalid:  t.Invalid,
+	})
 }
 
 type LexerOption func(l *Lexer) error
@@ -207,7 +236,7 @@ func (l *Lexer) Next() (*Token, error) {
 		if !tok.Invalid {
 			break
 		}
-		errTok.Match = errTok.Match.merge(tok.Match)
+		errTok.match = errTok.match.merge(tok.match)
 		l.logger.Log("  error token: %v", errTok)
 	}
 	l.tokBuf = append(l.tokBuf, tok)
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index e819078..f11a9c0 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -542,7 +542,7 @@ func TestLexer_Next(t *testing.T) {
 					break
 				}
 				testToken(t, eTok, tok)
-				// t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match, string(tok.Match), tok.EOF, tok.Invalid)
+				// t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match(), tok.Text(), tok.EOF, tok.Invalid)
 				if tok.EOF {
 					break
 				}
@@ -558,9 +558,9 @@ func testToken(t *testing.T, expected, actual *Token) {
 		actual.ModeName != actual.ModeName ||
 		actual.ID != expected.ID ||
 		actual.Kind != expected.Kind ||
-		!bytes.Equal(actual.Match, expected.Match) ||
+		!bytes.Equal(actual.Match(), expected.Match()) ||
 		actual.EOF != expected.EOF ||
 		actual.Invalid != expected.Invalid {
-		t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, string(expected.Match), actual, string(actual.Match))
+		t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text())
 	}
 }
author	Ryo Nihei <nihei.dev@gmail.com>	2021-05-11 01:20:38 +0900
committer	Ryo Nihei <nihei.dev@gmail.com>	2021-05-11 01:20:38 +0900
commit	89f56948422dad3ab0336b1d97a1e2b9d6554349 (patch)
tree	a17e305e032a64e8912ce8be90d4ce7f76e89475 /driver
parent	Update README and godoc (diff)
download	tre-89f56948422dad3ab0336b1d97a1e2b9d6554349.tar.gz tre-89f56948422dad3ab0336b1d97a1e2b9d6554349.tar.xz