diff options
-rw-r--r-- | cli/cmd/lex.go | 11 | ||||
-rw-r--r-- | driver/lexer.go | 86 | ||||
-rw-r--r-- | driver/lexer_test.go | 228 |
3 files changed, 185 insertions, 140 deletions
diff --git a/cli/cmd/lex.go b/cli/cmd/lex.go index 7efc814..14fbc01 100644 --- a/cli/cmd/lex.go +++ b/cli/cmd/lex.go @@ -74,15 +74,14 @@ Date time: %v if err != nil { return err } + data, err := json.Marshal(tok) + if err != nil { + fmt.Fprintf(os.Stderr, "failed to marshal a token; token: %v, error: %v\n", tok, err) + } + fmt.Fprintf(os.Stdout, "%v\n", string(data)) if tok.EOF { break } - if tok.Invalid { - fmt.Fprintf(os.Stdout, "-: -: ") - } else { - fmt.Fprintf(os.Stdout, "%v: %v: ", tok.ID, clspec.Kinds[tok.ID]) - } - fmt.Fprintf(os.Stdout, "\"%v\"\n", string(tok.Match)) } return nil diff --git a/driver/lexer.go b/driver/lexer.go index 750fec1..4a3c3cc 100644 --- a/driver/lexer.go +++ b/driver/lexer.go @@ -4,37 +4,73 @@ import ( "fmt" "io" "io/ioutil" + "strings" "github.com/nihei9/maleeni/log" "github.com/nihei9/maleeni/spec" ) +type byteSequence []byte + +func newByteSequence(b []byte) byteSequence { + return byteSequence(b) +} + +func (s byteSequence) ByteSlice() []byte { + return []byte(s) +} + +func (s byteSequence) String() string { + if len(s) <= 0 { + return "" + } + var b strings.Builder + fmt.Fprintf(&b, "%X", s[0]) + for _, d := range s[1:] { + fmt.Fprintf(&b, " %X", d) + } + return b.String() +} + +func (s byteSequence) GoString() string { + return fmt.Sprintf("\"%v\"", s.String()) +} + +func (s byteSequence) MarshalJSON() ([]byte, error) { + if len(s) <= 0 { + return []byte("[]"), nil + } + var b strings.Builder + fmt.Fprintf(&b, "[%v", uint8(s[0])) + for _, e := range s[1:] { + fmt.Fprintf(&b, ", %v", uint8(e)) + } + fmt.Fprintf(&b, "]") + return []byte(b.String()), nil +} + +func (s byteSequence) merge(a byteSequence) byteSequence { + return append([]byte(s), []byte(a)...) +} + type Token struct { - ID int - Kind string - Match []byte - EOF bool - Invalid bool + ID int `json:"id"` + Kind string `json:"kind"` + Match byteSequence `json:"match"` + Text string `json:"text"` + EOF bool `json:"eof"` + Invalid bool `json:"invalid"` } -func newToken(id int, kind string, match []byte) *Token { +func newToken(id int, kind string, match byteSequence) *Token { return &Token{ ID: id, Kind: kind, Match: match, + Text: string(match.ByteSlice()), } } -func (t *Token) String() string { - if t.Invalid { - return fmt.Sprintf("!{text: %v, byte: %v}", string(t.Match), t.Match) - } - if t.EOF { - return "{eof}" - } - return fmt.Sprintf("{id: %v, kind: %v, text: %v, byte: %v}", t.ID, t.Kind, string(t.Match), t.Match) -} - func newEOFToken() *Token { return &Token{ ID: 0, @@ -42,7 +78,7 @@ func newEOFToken() *Token { } } -func newInvalidToken(match []byte) *Token { +func newInvalidToken(match byteSequence) *Token { return &Token{ ID: 0, Match: match, @@ -50,6 +86,16 @@ func newInvalidToken(match []byte) *Token { } } +func (t *Token) String() string { + if t.Invalid { + return fmt.Sprintf("!{text: %v, byte: %v}", t.Text, t.Match) + } + if t.EOF { + return "{eof}" + } + return fmt.Sprintf("{id: %v, kind: %v, text: %v, byte: %v}", t.ID, t.Kind, t.Text, t.Match) +} + type lexerOption func(l *lexer) error func EnableLogging(w io.Writer) lexerOption { @@ -131,7 +177,7 @@ func (l *lexer) Next() (*Token, error) { if !tok.Invalid { break } - errTok.Match = append(errTok.Match, tok.Match...) + errTok.Match = errTok.Match.merge(tok.Match) l.logger.Log(" error token: %v", errTok) } l.tokBuf = append(l.tokBuf, tok) @@ -194,12 +240,12 @@ func (l *lexer) next() (*Token, error) { l.unread(unfixedBufLen) return tok, nil } - return newInvalidToken(buf), nil + return newInvalidToken(newByteSequence(buf)), nil } state = nextState id, ok := l.clspec.DFA.AcceptingStates[state] if ok { - tok = newToken(id, l.clspec.Kinds[id], buf) + tok = newToken(id, l.clspec.Kinds[id], newByteSequence(buf)) unfixedBufLen = 0 } } diff --git a/driver/lexer_test.go b/driver/lexer_test.go index 0c9f720..e48c12f 100644 --- a/driver/lexer_test.go +++ b/driver/lexer_test.go @@ -24,17 +24,17 @@ func TestLexer_Next(t *testing.T) { }, src: "abb aabb aaabb babb bbabb abbbabb", tokens: []*Token{ - newToken(1, "t1", []byte("abb")), - newToken(2, "t2", []byte(" ")), - newToken(1, "t1", []byte("aabb")), - newToken(2, "t2", []byte(" ")), - newToken(1, "t1", []byte("aaabb")), - newToken(2, "t2", []byte(" ")), - newToken(1, "t1", []byte("babb")), - newToken(2, "t2", []byte(" ")), - newToken(1, "t1", []byte("bbabb")), - newToken(2, "t2", []byte(" ")), - newToken(1, "t1", []byte("abbbabb")), + newToken(1, "t1", newByteSequence([]byte("abb"))), + newToken(2, "t2", newByteSequence([]byte(" "))), + newToken(1, "t1", newByteSequence([]byte("aabb"))), + newToken(2, "t2", newByteSequence([]byte(" "))), + newToken(1, "t1", newByteSequence([]byte("aaabb"))), + newToken(2, "t2", newByteSequence([]byte(" "))), + newToken(1, "t1", newByteSequence([]byte("babb"))), + newToken(2, "t2", newByteSequence([]byte(" "))), + newToken(1, "t1", newByteSequence([]byte("bbabb"))), + newToken(2, "t2", newByteSequence([]byte(" "))), + newToken(1, "t1", newByteSequence([]byte("abbbabb"))), newEOFToken(), }, }, @@ -48,21 +48,21 @@ func TestLexer_Next(t *testing.T) { }, src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd", tokens: []*Token{ - newToken(1, "t1", []byte("ba")), - newToken(3, "t3", []byte(" ")), - newToken(1, "t1", []byte("baaa")), - newToken(3, "t3", []byte(" ")), - newToken(1, "t1", []byte("a")), - newToken(3, "t3", []byte(" ")), - newToken(1, "t1", []byte("aaa")), - newToken(3, "t3", []byte(" ")), - newToken(2, "t2", []byte("abcd")), - newToken(3, "t3", []byte(" ")), - newToken(2, "t2", []byte("abcdcdcd")), - newToken(3, "t3", []byte(" ")), - newToken(2, "t2", []byte("cd")), - newToken(3, "t3", []byte(" ")), - newToken(2, "t2", []byte("cdcdcd")), + newToken(1, "t1", newByteSequence([]byte("ba"))), + newToken(3, "t3", newByteSequence([]byte(" "))), + newToken(1, "t1", newByteSequence([]byte("baaa"))), + newToken(3, "t3", newByteSequence([]byte(" "))), + newToken(1, "t1", newByteSequence([]byte("a"))), + newToken(3, "t3", newByteSequence([]byte(" "))), + newToken(1, "t1", newByteSequence([]byte("aaa"))), + newToken(3, "t3", newByteSequence([]byte(" "))), + newToken(2, "t2", newByteSequence([]byte("abcd"))), + newToken(3, "t3", newByteSequence([]byte(" "))), + newToken(2, "t2", newByteSequence([]byte("abcdcdcd"))), + newToken(3, "t3", newByteSequence([]byte(" "))), + newToken(2, "t2", newByteSequence([]byte("cd"))), + newToken(3, "t3", newByteSequence([]byte(" "))), + newToken(2, "t2", newByteSequence([]byte("cdcdcd"))), newEOFToken(), }, }, @@ -91,22 +91,22 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newToken(1, "t1", []byte{0x00}), - newToken(1, "t1", []byte{0x7f}), - newToken(1, "t1", []byte{0xc2, 0x80}), - newToken(1, "t1", []byte{0xdf, 0xbf}), - newToken(1, "t1", []byte{0xe1, 0x80, 0x80}), - newToken(1, "t1", []byte{0xec, 0xbf, 0xbf}), - newToken(1, "t1", []byte{0xed, 0x80, 0x80}), - newToken(1, "t1", []byte{0xed, 0x9f, 0xbf}), - newToken(1, "t1", []byte{0xee, 0x80, 0x80}), - newToken(1, "t1", []byte{0xef, 0xbf, 0xbf}), - newToken(1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}), - newToken(1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}), - newToken(1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}), - newToken(1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}), - newToken(1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}), - newToken(1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}), + newToken(1, "t1", newByteSequence([]byte{0x00})), + newToken(1, "t1", newByteSequence([]byte{0x7f})), + newToken(1, "t1", newByteSequence([]byte{0xc2, 0x80})), + newToken(1, "t1", newByteSequence([]byte{0xdf, 0xbf})), + newToken(1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})), + newToken(1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})), + newToken(1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})), + newToken(1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})), + newToken(1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})), + newToken(1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})), + newToken(1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newToken(1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), + newToken(1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})), + newToken(1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})), + newToken(1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})), + newToken(1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})), newEOFToken(), }, }, @@ -118,17 +118,17 @@ func TestLexer_Next(t *testing.T) { }, src: "ab.*+?|()[]", tokens: []*Token{ - newToken(1, "t1", []byte("a")), - newToken(1, "t1", []byte("b")), - newToken(1, "t1", []byte(".")), - newToken(1, "t1", []byte("*")), - newToken(1, "t1", []byte("+")), - newToken(1, "t1", []byte("?")), - newToken(1, "t1", []byte("|")), - newToken(1, "t1", []byte("(")), - newToken(1, "t1", []byte(")")), - newToken(1, "t1", []byte("[")), - newToken(1, "t1", []byte("]")), + newToken(1, "t1", newByteSequence([]byte("a"))), + newToken(1, "t1", newByteSequence([]byte("b"))), + newToken(1, "t1", newByteSequence([]byte("."))), + newToken(1, "t1", newByteSequence([]byte("*"))), + newToken(1, "t1", newByteSequence([]byte("+"))), + newToken(1, "t1", newByteSequence([]byte("?"))), + newToken(1, "t1", newByteSequence([]byte("|"))), + newToken(1, "t1", newByteSequence([]byte("("))), + newToken(1, "t1", newByteSequence([]byte(")"))), + newToken(1, "t1", newByteSequence([]byte("["))), + newToken(1, "t1", newByteSequence([]byte("]"))), newEOFToken(), }, }, @@ -151,10 +151,10 @@ func TestLexer_Next(t *testing.T) { 0x7f, }), tokens: []*Token{ - newToken(1, "1ByteChar", []byte{0x01}), - newToken(1, "1ByteChar", []byte{0x02}), - newToken(1, "1ByteChar", []byte{0x7e}), - newToken(1, "1ByteChar", []byte{0x7f}), + newToken(1, "1ByteChar", newByteSequence([]byte{0x01})), + newToken(1, "1ByteChar", newByteSequence([]byte{0x02})), + newToken(1, "1ByteChar", newByteSequence([]byte{0x7e})), + newToken(1, "1ByteChar", newByteSequence([]byte{0x7f})), newEOFToken(), }, }, @@ -172,10 +172,10 @@ func TestLexer_Next(t *testing.T) { 0xdf, 0xbf, }), tokens: []*Token{ - newToken(1, "2ByteChar", []byte{0xc2, 0x80}), - newToken(1, "2ByteChar", []byte{0xc2, 0x81}), - newToken(1, "2ByteChar", []byte{0xdf, 0xbe}), - newToken(1, "2ByteChar", []byte{0xdf, 0xbf}), + newToken(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})), + newToken(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})), + newToken(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})), + newToken(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})), newEOFToken(), }, }, @@ -190,7 +190,7 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xa0, 0x80, }), tokens: []*Token{ - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), newEOFToken(), }, }, @@ -208,10 +208,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xa0, 0xbf, }), tokens: []*Token{ - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}), - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})), newEOFToken(), }, }, @@ -229,10 +229,10 @@ func TestLexer_Next(t *testing.T) { 0xe0, 0xbf, 0xbf, }), tokens: []*Token{ - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), - newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}), - newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})), newEOFToken(), }, }, @@ -262,22 +262,22 @@ func TestLexer_Next(t *testing.T) { 0xef, 0xbf, 0xbf, }), tokens: []*Token{ - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}), - newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}), - newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}), - newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}), - newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x80}), - newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x81}), - newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}), - newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}), - newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x80}), - newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x81}), - newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}), - newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}), - newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x80}), - newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x81}), - newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}), - newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})), + newToken(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})), newEOFToken(), }, }, @@ -292,7 +292,7 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0x80, 0x80, }), tokens: []*Token{ - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), newEOFToken(), }, }, @@ -310,10 +310,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0x80, 0xbf, }), tokens: []*Token{ - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}), - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})), newEOFToken(), }, }, @@ -331,10 +331,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0x90, 0xbf, 0xbf, }), tokens: []*Token{ - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}), - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})), newEOFToken(), }, }, @@ -352,10 +352,10 @@ func TestLexer_Next(t *testing.T) { 0xf0, 0xbf, 0xbf, 0xbf, }), tokens: []*Token{ - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), - newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}), - newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), newEOFToken(), }, }, @@ -381,18 +381,18 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}), - newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}), - newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}), - newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}), - newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}), - newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}), - newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}), - newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}), - newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}), - newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}), - newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}), - newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})), + newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})), newEOFToken(), }, }, @@ -404,7 +404,7 @@ func TestLexer_Next(t *testing.T) { }, src: "foo9", tokens: []*Token{ - newToken(1, "NonNumber", []byte("foo9")), + newToken(1, "NonNumber", newByteSequence([]byte("foo9"))), newEOFToken(), }, }, @@ -451,11 +451,11 @@ func TestLexer_PeekN(t *testing.T) { expectedTokens := []*Token{ { ID: 1, - Match: []byte("foo"), + Match: newByteSequence([]byte("foo")), }, { ID: 2, - Match: []byte("bar"), + Match: newByteSequence([]byte("bar")), }, { EOF: true, |