3 files changed, 185 insertions, 140 deletions
diff --git a/cli/cmd/lex.go b/cli/cmd/lex.go
index 7efc814..14fbc01 100644
--- a/cli/cmd/lex.go
+++ b/cli/cmd/lex.go
@@ -74,15 +74,14 @@ Date time: %v
 		if err != nil {
 			return err
 		}
+		data, err := json.Marshal(tok)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "failed to marshal a token; token: %v, error: %v\n", tok, err)
+		}
+		fmt.Fprintf(os.Stdout, "%v\n", string(data))
 		if tok.EOF {
 			break
 		}
-		if tok.Invalid {
-			fmt.Fprintf(os.Stdout, "-: -: ")
-		} else {
-			fmt.Fprintf(os.Stdout, "%v: %v: ", tok.ID, clspec.Kinds[tok.ID])
-		}
-		fmt.Fprintf(os.Stdout, "\"%v\"\n", string(tok.Match))
 	}
 
 	return nil
diff --git a/driver/lexer.go b/driver/lexer.go
index 750fec1..4a3c3cc 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -4,37 +4,73 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"strings"
 
 	"github.com/nihei9/maleeni/log"
 	"github.com/nihei9/maleeni/spec"
 )
 
+type byteSequence []byte
+
+func newByteSequence(b []byte) byteSequence {
+	return byteSequence(b)
+}
+
+func (s byteSequence) ByteSlice() []byte {
+	return []byte(s)
+}
+
+func (s byteSequence) String() string {
+	if len(s) <= 0 {
+		return ""
+	}
+	var b strings.Builder
+	fmt.Fprintf(&b, "%X", s[0])
+	for _, d := range s[1:] {
+		fmt.Fprintf(&b, " %X", d)
+	}
+	return b.String()
+}
+
+func (s byteSequence) GoString() string {
+	return fmt.Sprintf("\"%v\"", s.String())
+}
+
+func (s byteSequence) MarshalJSON() ([]byte, error) {
+	if len(s) <= 0 {
+		return []byte("[]"), nil
+	}
+	var b strings.Builder
+	fmt.Fprintf(&b, "[%v", uint8(s[0]))
+	for _, e := range s[1:] {
+		fmt.Fprintf(&b, ", %v", uint8(e))
+	}
+	fmt.Fprintf(&b, "]")
+	return []byte(b.String()), nil
+}
+
+func (s byteSequence) merge(a byteSequence) byteSequence {
+	return append([]byte(s), []byte(a)...)
+}
+
 type Token struct {
-	ID      int
-	Kind    string
-	Match   []byte
-	EOF     bool
-	Invalid bool
+	ID      int          `json:"id"`
+	Kind    string       `json:"kind"`
+	Match   byteSequence `json:"match"`
+	Text    string       `json:"text"`
+	EOF     bool         `json:"eof"`
+	Invalid bool         `json:"invalid"`
 }
 
-func newToken(id int, kind string, match []byte) *Token {
+func newToken(id int, kind string, match byteSequence) *Token {
 	return &Token{
 		ID:    id,
 		Kind:  kind,
 		Match: match,
+		Text:  string(match.ByteSlice()),
 	}
 }
 
-func (t *Token) String() string {
-	if t.Invalid {
-		return fmt.Sprintf("!{text: %v, byte: %v}", string(t.Match), t.Match)
-	}
-	if t.EOF {
-		return "{eof}"
-	}
-	return fmt.Sprintf("{id: %v, kind: %v, text: %v, byte: %v}", t.ID, t.Kind, string(t.Match), t.Match)
-}
-
 func newEOFToken() *Token {
 	return &Token{
 		ID:  0,
@@ -42,7 +78,7 @@ func newEOFToken() *Token {
 	}
 }
 
-func newInvalidToken(match []byte) *Token {
+func newInvalidToken(match byteSequence) *Token {
 	return &Token{
 		ID:      0,
 		Match:   match,
@@ -50,6 +86,16 @@ func newInvalidToken(match []byte) *Token {
 	}
 }
 
+func (t *Token) String() string {
+	if t.Invalid {
+		return fmt.Sprintf("!{text: %v, byte: %v}", t.Text, t.Match)
+	}
+	if t.EOF {
+		return "{eof}"
+	}
+	return fmt.Sprintf("{id: %v, kind: %v, text: %v, byte: %v}", t.ID, t.Kind, t.Text, t.Match)
+}
+
 type lexerOption func(l *lexer) error
 
 func EnableLogging(w io.Writer) lexerOption {
@@ -131,7 +177,7 @@ func (l *lexer) Next() (*Token, error) {
 		if !tok.Invalid {
 			break
 		}
-		errTok.Match = append(errTok.Match, tok.Match...)
+		errTok.Match = errTok.Match.merge(tok.Match)
 		l.logger.Log("  error token: %v", errTok)
 	}
 	l.tokBuf = append(l.tokBuf, tok)
@@ -194,12 +240,12 @@ func (l *lexer) next() (*Token, error) {
 				l.unread(unfixedBufLen)
 				return tok, nil
 			}
-			return newInvalidToken(buf), nil
+			return newInvalidToken(newByteSequence(buf)), nil
 		}
 		state = nextState
 		id, ok := l.clspec.DFA.AcceptingStates[state]
 		if ok {
-			tok = newToken(id, l.clspec.Kinds[id], buf)
+			tok = newToken(id, l.clspec.Kinds[id], newByteSequence(buf))
 			unfixedBufLen = 0
 		}
 	}
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 0c9f720..e48c12f 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -24,17 +24,17 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "abb aabb aaabb babb bbabb abbbabb",
 			tokens: []*Token{
-				newToken(1, "t1", []byte("abb")),
-				newToken(2, "t2", []byte(" ")),
-				newToken(1, "t1", []byte("aabb")),
-				newToken(2, "t2", []byte(" ")),
-				newToken(1, "t1", []byte("aaabb")),
-				newToken(2, "t2", []byte(" ")),
-				newToken(1, "t1", []byte("babb")),
-				newToken(2, "t2", []byte(" ")),
-				newToken(1, "t1", []byte("bbabb")),
-				newToken(2, "t2", []byte(" ")),
-				newToken(1, "t1", []byte("abbbabb")),
+				newToken(1, "t1", newByteSequence([]byte("abb"))),
+				newToken(2, "t2", newByteSequence([]byte(" "))),
+				newToken(1, "t1", newByteSequence([]byte("aabb"))),
+				newToken(2, "t2", newByteSequence([]byte(" "))),
+				newToken(1, "t1", newByteSequence([]byte("aaabb"))),
+				newToken(2, "t2", newByteSequence([]byte(" "))),
+				newToken(1, "t1", newByteSequence([]byte("babb"))),
+				newToken(2, "t2", newByteSequence([]byte(" "))),
+				newToken(1, "t1", newByteSequence([]byte("bbabb"))),
+				newToken(2, "t2", newByteSequence([]byte(" "))),
+				newToken(1, "t1", newByteSequence([]byte("abbbabb"))),
 				newEOFToken(),
 			},
 		},
@@ -48,21 +48,21 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
 			tokens: []*Token{
-				newToken(1, "t1", []byte("ba")),
-				newToken(3, "t3", []byte(" ")),
-				newToken(1, "t1", []byte("baaa")),
-				newToken(3, "t3", []byte(" ")),
-				newToken(1, "t1", []byte("a")),
-				newToken(3, "t3", []byte(" ")),
-				newToken(1, "t1", []byte("aaa")),
-				newToken(3, "t3", []byte(" ")),
-				newToken(2, "t2", []byte("abcd")),
-				newToken(3, "t3", []byte(" ")),
-				newToken(2, "t2", []byte("abcdcdcd")),
-				newToken(3, "t3", []byte(" ")),
-				newToken(2, "t2", []byte("cd")),
-				newToken(3, "t3", []byte(" ")),
-				newToken(2, "t2", []byte("cdcdcd")),
+				newToken(1, "t1", newByteSequence([]byte("ba"))),
+				newToken(3, "t3", newByteSequence([]byte(" "))),
+				newToken(1, "t1", newByteSequence([]byte("baaa"))),
+				newToken(3, "t3", newByteSequence([]byte(" "))),
+				newToken(1, "t1", newByteSequence([]byte("a"))),
+				newToken(3, "t3", newByteSequence([]byte(" "))),
+				newToken(1, "t1", newByteSequence([]byte("aaa"))),
+				newToken(3, "t3", newByteSequence([]byte(" "))),
+				newToken(2, "t2", newByteSequence([]byte("abcd"))),
+				newToken(3, "t3", newByteSequence([]byte(" "))),
+				newToken(2, "t2", newByteSequence([]byte("abcdcdcd"))),
+				newToken(3, "t3", newByteSequence([]byte(" "))),
+				newToken(2, "t2", newByteSequence([]byte("cd"))),
+				newToken(3, "t3", newByteSequence([]byte(" "))),
+				newToken(2, "t2", newByteSequence([]byte("cdcdcd"))),
 				newEOFToken(),
 			},
 		},
@@ -91,22 +91,22 @@ func TestLexer_Next(t *testing.T) {
 				0xf4, 0x8f, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "t1", []byte{0x00}),
-				newToken(1, "t1", []byte{0x7f}),
-				newToken(1, "t1", []byte{0xc2, 0x80}),
-				newToken(1, "t1", []byte{0xdf, 0xbf}),
-				newToken(1, "t1", []byte{0xe1, 0x80, 0x80}),
-				newToken(1, "t1", []byte{0xec, 0xbf, 0xbf}),
-				newToken(1, "t1", []byte{0xed, 0x80, 0x80}),
-				newToken(1, "t1", []byte{0xed, 0x9f, 0xbf}),
-				newToken(1, "t1", []byte{0xee, 0x80, 0x80}),
-				newToken(1, "t1", []byte{0xef, 0xbf, 0xbf}),
-				newToken(1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newToken(1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
-				newToken(1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}),
-				newToken(1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
-				newToken(1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}),
-				newToken(1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+				newToken(1, "t1", newByteSequence([]byte{0x00})),
+				newToken(1, "t1", newByteSequence([]byte{0x7f})),
+				newToken(1, "t1", newByteSequence([]byte{0xc2, 0x80})),
+				newToken(1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
+				newToken(1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+				newToken(1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+				newToken(1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
+				newToken(1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+				newToken(1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
+				newToken(1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+				newToken(1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newToken(1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+				newToken(1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+				newToken(1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+				newToken(1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+				newToken(1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -118,17 +118,17 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "ab.*+?|()[]",
 			tokens: []*Token{
-				newToken(1, "t1", []byte("a")),
-				newToken(1, "t1", []byte("b")),
-				newToken(1, "t1", []byte(".")),
-				newToken(1, "t1", []byte("*")),
-				newToken(1, "t1", []byte("+")),
-				newToken(1, "t1", []byte("?")),
-				newToken(1, "t1", []byte("|")),
-				newToken(1, "t1", []byte("(")),
-				newToken(1, "t1", []byte(")")),
-				newToken(1, "t1", []byte("[")),
-				newToken(1, "t1", []byte("]")),
+				newToken(1, "t1", newByteSequence([]byte("a"))),
+				newToken(1, "t1", newByteSequence([]byte("b"))),
+				newToken(1, "t1", newByteSequence([]byte("."))),
+				newToken(1, "t1", newByteSequence([]byte("*"))),
+				newToken(1, "t1", newByteSequence([]byte("+"))),
+				newToken(1, "t1", newByteSequence([]byte("?"))),
+				newToken(1, "t1", newByteSequence([]byte("|"))),
+				newToken(1, "t1", newByteSequence([]byte("("))),
+				newToken(1, "t1", newByteSequence([]byte(")"))),
+				newToken(1, "t1", newByteSequence([]byte("["))),
+				newToken(1, "t1", newByteSequence([]byte("]"))),
 				newEOFToken(),
 			},
 		},
@@ -151,10 +151,10 @@ func TestLexer_Next(t *testing.T) {
 				0x7f,
 			}),
 			tokens: []*Token{
-				newToken(1, "1ByteChar", []byte{0x01}),
-				newToken(1, "1ByteChar", []byte{0x02}),
-				newToken(1, "1ByteChar", []byte{0x7e}),
-				newToken(1, "1ByteChar", []byte{0x7f}),
+				newToken(1, "1ByteChar", newByteSequence([]byte{0x01})),
+				newToken(1, "1ByteChar", newByteSequence([]byte{0x02})),
+				newToken(1, "1ByteChar", newByteSequence([]byte{0x7e})),
+				newToken(1, "1ByteChar", newByteSequence([]byte{0x7f})),
 				newEOFToken(),
 			},
 		},
@@ -172,10 +172,10 @@ func TestLexer_Next(t *testing.T) {
 				0xdf, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "2ByteChar", []byte{0xc2, 0x80}),
-				newToken(1, "2ByteChar", []byte{0xc2, 0x81}),
-				newToken(1, "2ByteChar", []byte{0xdf, 0xbe}),
-				newToken(1, "2ByteChar", []byte{0xdf, 0xbf}),
+				newToken(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
+				newToken(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
+				newToken(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
+				newToken(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -190,7 +190,7 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xa0, 0x80,
 			}),
 			tokens: []*Token{
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
 				newEOFToken(),
 			},
 		},
@@ -208,10 +208,10 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xa0, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -229,10 +229,10 @@ func TestLexer_Next(t *testing.T) {
 				0xe0, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -262,22 +262,22 @@ func TestLexer_Next(t *testing.T) {
 				0xef, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
-				newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
-				newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x80}),
-				newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x81}),
-				newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}),
-				newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}),
-				newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x80}),
-				newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x81}),
-				newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}),
-				newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}),
-				newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x80}),
-				newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x81}),
-				newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}),
-				newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
+				newToken(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -292,7 +292,7 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0x80, 0x80,
 			}),
 			tokens: []*Token{
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
 				newEOFToken(),
 			},
 		},
@@ -310,10 +310,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0x80, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -331,10 +331,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0x90, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -352,10 +352,10 @@ func TestLexer_Next(t *testing.T) {
 				0xf0, 0xbf, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -381,18 +381,18 @@ func TestLexer_Next(t *testing.T) {
 				0xf4, 0x8f, 0xbf, 0xbf,
 			}),
 			tokens: []*Token{
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
-				newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
-				newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}),
-				newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}),
-				newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
-				newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
-				newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}),
-				newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}),
-				newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
-				newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
+				newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
 				newEOFToken(),
 			},
 		},
@@ -404,7 +404,7 @@ func TestLexer_Next(t *testing.T) {
 			},
 			src: "foo9",
 			tokens: []*Token{
-				newToken(1, "NonNumber", []byte("foo9")),
+				newToken(1, "NonNumber", newByteSequence([]byte("foo9"))),
 				newEOFToken(),
 			},
 		},
@@ -451,11 +451,11 @@ func TestLexer_PeekN(t *testing.T) {
 	expectedTokens := []*Token{
 		{
 			ID:    1,
-			Match: []byte("foo"),
+			Match: newByteSequence([]byte("foo")),
 		},
 		{
 			ID:    2,
-			Match: []byte("bar"),
+			Match: newByteSequence([]byte("bar")),
 		},
 		{
 			EOF: true,