aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-04-06 00:01:34 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-04-06 00:01:34 +0900
commit2b5e3239faf7dd14b18c6704da5d6cacc27844fe (patch)
treeb81e7bcc3701c159cf4af9039f2987e88a2554f6
parentAdd logical inverse expression (diff)
downloadtre-2b5e3239faf7dd14b18c6704da5d6cacc27844fe.tar.gz
tre-2b5e3239faf7dd14b18c6704da5d6cacc27844fe.tar.xz
Print the result of the lex command in JSON format
* Print the result of the lex command in JSON format. * Print the EOF token.
-rw-r--r--cli/cmd/lex.go11
-rw-r--r--driver/lexer.go86
-rw-r--r--driver/lexer_test.go228
3 files changed, 185 insertions, 140 deletions
diff --git a/cli/cmd/lex.go b/cli/cmd/lex.go
index 7efc814..14fbc01 100644
--- a/cli/cmd/lex.go
+++ b/cli/cmd/lex.go
@@ -74,15 +74,14 @@ Date time: %v
if err != nil {
return err
}
+ data, err := json.Marshal(tok)
+ if err != nil {
+ fmt.Fprintf(os.Stderr, "failed to marshal a token; token: %v, error: %v\n", tok, err)
+ }
+ fmt.Fprintf(os.Stdout, "%v\n", string(data))
if tok.EOF {
break
}
- if tok.Invalid {
- fmt.Fprintf(os.Stdout, "-: -: ")
- } else {
- fmt.Fprintf(os.Stdout, "%v: %v: ", tok.ID, clspec.Kinds[tok.ID])
- }
- fmt.Fprintf(os.Stdout, "\"%v\"\n", string(tok.Match))
}
return nil
diff --git a/driver/lexer.go b/driver/lexer.go
index 750fec1..4a3c3cc 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -4,37 +4,73 @@ import (
"fmt"
"io"
"io/ioutil"
+ "strings"
"github.com/nihei9/maleeni/log"
"github.com/nihei9/maleeni/spec"
)
+type byteSequence []byte
+
+func newByteSequence(b []byte) byteSequence {
+ return byteSequence(b)
+}
+
+func (s byteSequence) ByteSlice() []byte {
+ return []byte(s)
+}
+
+func (s byteSequence) String() string {
+ if len(s) <= 0 {
+ return ""
+ }
+ var b strings.Builder
+ fmt.Fprintf(&b, "%X", s[0])
+ for _, d := range s[1:] {
+ fmt.Fprintf(&b, " %X", d)
+ }
+ return b.String()
+}
+
+func (s byteSequence) GoString() string {
+ return fmt.Sprintf("\"%v\"", s.String())
+}
+
+func (s byteSequence) MarshalJSON() ([]byte, error) {
+ if len(s) <= 0 {
+ return []byte("[]"), nil
+ }
+ var b strings.Builder
+ fmt.Fprintf(&b, "[%v", uint8(s[0]))
+ for _, e := range s[1:] {
+ fmt.Fprintf(&b, ", %v", uint8(e))
+ }
+ fmt.Fprintf(&b, "]")
+ return []byte(b.String()), nil
+}
+
+func (s byteSequence) merge(a byteSequence) byteSequence {
+ return append([]byte(s), []byte(a)...)
+}
+
type Token struct {
- ID int
- Kind string
- Match []byte
- EOF bool
- Invalid bool
+ ID int `json:"id"`
+ Kind string `json:"kind"`
+ Match byteSequence `json:"match"`
+ Text string `json:"text"`
+ EOF bool `json:"eof"`
+ Invalid bool `json:"invalid"`
}
-func newToken(id int, kind string, match []byte) *Token {
+func newToken(id int, kind string, match byteSequence) *Token {
return &Token{
ID: id,
Kind: kind,
Match: match,
+ Text: string(match.ByteSlice()),
}
}
-func (t *Token) String() string {
- if t.Invalid {
- return fmt.Sprintf("!{text: %v, byte: %v}", string(t.Match), t.Match)
- }
- if t.EOF {
- return "{eof}"
- }
- return fmt.Sprintf("{id: %v, kind: %v, text: %v, byte: %v}", t.ID, t.Kind, string(t.Match), t.Match)
-}
-
func newEOFToken() *Token {
return &Token{
ID: 0,
@@ -42,7 +78,7 @@ func newEOFToken() *Token {
}
}
-func newInvalidToken(match []byte) *Token {
+func newInvalidToken(match byteSequence) *Token {
return &Token{
ID: 0,
Match: match,
@@ -50,6 +86,16 @@ func newInvalidToken(match []byte) *Token {
}
}
+func (t *Token) String() string {
+ if t.Invalid {
+ return fmt.Sprintf("!{text: %v, byte: %v}", t.Text, t.Match)
+ }
+ if t.EOF {
+ return "{eof}"
+ }
+ return fmt.Sprintf("{id: %v, kind: %v, text: %v, byte: %v}", t.ID, t.Kind, t.Text, t.Match)
+}
+
type lexerOption func(l *lexer) error
func EnableLogging(w io.Writer) lexerOption {
@@ -131,7 +177,7 @@ func (l *lexer) Next() (*Token, error) {
if !tok.Invalid {
break
}
- errTok.Match = append(errTok.Match, tok.Match...)
+ errTok.Match = errTok.Match.merge(tok.Match)
l.logger.Log(" error token: %v", errTok)
}
l.tokBuf = append(l.tokBuf, tok)
@@ -194,12 +240,12 @@ func (l *lexer) next() (*Token, error) {
l.unread(unfixedBufLen)
return tok, nil
}
- return newInvalidToken(buf), nil
+ return newInvalidToken(newByteSequence(buf)), nil
}
state = nextState
id, ok := l.clspec.DFA.AcceptingStates[state]
if ok {
- tok = newToken(id, l.clspec.Kinds[id], buf)
+ tok = newToken(id, l.clspec.Kinds[id], newByteSequence(buf))
unfixedBufLen = 0
}
}
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 0c9f720..e48c12f 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -24,17 +24,17 @@ func TestLexer_Next(t *testing.T) {
},
src: "abb aabb aaabb babb bbabb abbbabb",
tokens: []*Token{
- newToken(1, "t1", []byte("abb")),
- newToken(2, "t2", []byte(" ")),
- newToken(1, "t1", []byte("aabb")),
- newToken(2, "t2", []byte(" ")),
- newToken(1, "t1", []byte("aaabb")),
- newToken(2, "t2", []byte(" ")),
- newToken(1, "t1", []byte("babb")),
- newToken(2, "t2", []byte(" ")),
- newToken(1, "t1", []byte("bbabb")),
- newToken(2, "t2", []byte(" ")),
- newToken(1, "t1", []byte("abbbabb")),
+ newToken(1, "t1", newByteSequence([]byte("abb"))),
+ newToken(2, "t2", newByteSequence([]byte(" "))),
+ newToken(1, "t1", newByteSequence([]byte("aabb"))),
+ newToken(2, "t2", newByteSequence([]byte(" "))),
+ newToken(1, "t1", newByteSequence([]byte("aaabb"))),
+ newToken(2, "t2", newByteSequence([]byte(" "))),
+ newToken(1, "t1", newByteSequence([]byte("babb"))),
+ newToken(2, "t2", newByteSequence([]byte(" "))),
+ newToken(1, "t1", newByteSequence([]byte("bbabb"))),
+ newToken(2, "t2", newByteSequence([]byte(" "))),
+ newToken(1, "t1", newByteSequence([]byte("abbbabb"))),
newEOFToken(),
},
},
@@ -48,21 +48,21 @@ func TestLexer_Next(t *testing.T) {
},
src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
tokens: []*Token{
- newToken(1, "t1", []byte("ba")),
- newToken(3, "t3", []byte(" ")),
- newToken(1, "t1", []byte("baaa")),
- newToken(3, "t3", []byte(" ")),
- newToken(1, "t1", []byte("a")),
- newToken(3, "t3", []byte(" ")),
- newToken(1, "t1", []byte("aaa")),
- newToken(3, "t3", []byte(" ")),
- newToken(2, "t2", []byte("abcd")),
- newToken(3, "t3", []byte(" ")),
- newToken(2, "t2", []byte("abcdcdcd")),
- newToken(3, "t3", []byte(" ")),
- newToken(2, "t2", []byte("cd")),
- newToken(3, "t3", []byte(" ")),
- newToken(2, "t2", []byte("cdcdcd")),
+ newToken(1, "t1", newByteSequence([]byte("ba"))),
+ newToken(3, "t3", newByteSequence([]byte(" "))),
+ newToken(1, "t1", newByteSequence([]byte("baaa"))),
+ newToken(3, "t3", newByteSequence([]byte(" "))),
+ newToken(1, "t1", newByteSequence([]byte("a"))),
+ newToken(3, "t3", newByteSequence([]byte(" "))),
+ newToken(1, "t1", newByteSequence([]byte("aaa"))),
+ newToken(3, "t3", newByteSequence([]byte(" "))),
+ newToken(2, "t2", newByteSequence([]byte("abcd"))),
+ newToken(3, "t3", newByteSequence([]byte(" "))),
+ newToken(2, "t2", newByteSequence([]byte("abcdcdcd"))),
+ newToken(3, "t3", newByteSequence([]byte(" "))),
+ newToken(2, "t2", newByteSequence([]byte("cd"))),
+ newToken(3, "t3", newByteSequence([]byte(" "))),
+ newToken(2, "t2", newByteSequence([]byte("cdcdcd"))),
newEOFToken(),
},
},
@@ -91,22 +91,22 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "t1", []byte{0x00}),
- newToken(1, "t1", []byte{0x7f}),
- newToken(1, "t1", []byte{0xc2, 0x80}),
- newToken(1, "t1", []byte{0xdf, 0xbf}),
- newToken(1, "t1", []byte{0xe1, 0x80, 0x80}),
- newToken(1, "t1", []byte{0xec, 0xbf, 0xbf}),
- newToken(1, "t1", []byte{0xed, 0x80, 0x80}),
- newToken(1, "t1", []byte{0xed, 0x9f, 0xbf}),
- newToken(1, "t1", []byte{0xee, 0x80, 0x80}),
- newToken(1, "t1", []byte{0xef, 0xbf, 0xbf}),
- newToken(1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}),
- newToken(1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
- newToken(1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}),
- newToken(1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
- newToken(1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}),
- newToken(1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+ newToken(1, "t1", newByteSequence([]byte{0x00})),
+ newToken(1, "t1", newByteSequence([]byte{0x7f})),
+ newToken(1, "t1", newByteSequence([]byte{0xc2, 0x80})),
+ newToken(1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
+ newToken(1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+ newToken(1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+ newToken(1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
+ newToken(1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+ newToken(1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
+ newToken(1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+ newToken(1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newToken(1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newToken(1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+ newToken(1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+ newToken(1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+ newToken(1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
newEOFToken(),
},
},
@@ -118,17 +118,17 @@ func TestLexer_Next(t *testing.T) {
},
src: "ab.*+?|()[]",
tokens: []*Token{
- newToken(1, "t1", []byte("a")),
- newToken(1, "t1", []byte("b")),
- newToken(1, "t1", []byte(".")),
- newToken(1, "t1", []byte("*")),
- newToken(1, "t1", []byte("+")),
- newToken(1, "t1", []byte("?")),
- newToken(1, "t1", []byte("|")),
- newToken(1, "t1", []byte("(")),
- newToken(1, "t1", []byte(")")),
- newToken(1, "t1", []byte("[")),
- newToken(1, "t1", []byte("]")),
+ newToken(1, "t1", newByteSequence([]byte("a"))),
+ newToken(1, "t1", newByteSequence([]byte("b"))),
+ newToken(1, "t1", newByteSequence([]byte("."))),
+ newToken(1, "t1", newByteSequence([]byte("*"))),
+ newToken(1, "t1", newByteSequence([]byte("+"))),
+ newToken(1, "t1", newByteSequence([]byte("?"))),
+ newToken(1, "t1", newByteSequence([]byte("|"))),
+ newToken(1, "t1", newByteSequence([]byte("("))),
+ newToken(1, "t1", newByteSequence([]byte(")"))),
+ newToken(1, "t1", newByteSequence([]byte("["))),
+ newToken(1, "t1", newByteSequence([]byte("]"))),
newEOFToken(),
},
},
@@ -151,10 +151,10 @@ func TestLexer_Next(t *testing.T) {
0x7f,
}),
tokens: []*Token{
- newToken(1, "1ByteChar", []byte{0x01}),
- newToken(1, "1ByteChar", []byte{0x02}),
- newToken(1, "1ByteChar", []byte{0x7e}),
- newToken(1, "1ByteChar", []byte{0x7f}),
+ newToken(1, "1ByteChar", newByteSequence([]byte{0x01})),
+ newToken(1, "1ByteChar", newByteSequence([]byte{0x02})),
+ newToken(1, "1ByteChar", newByteSequence([]byte{0x7e})),
+ newToken(1, "1ByteChar", newByteSequence([]byte{0x7f})),
newEOFToken(),
},
},
@@ -172,10 +172,10 @@ func TestLexer_Next(t *testing.T) {
0xdf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "2ByteChar", []byte{0xc2, 0x80}),
- newToken(1, "2ByteChar", []byte{0xc2, 0x81}),
- newToken(1, "2ByteChar", []byte{0xdf, 0xbe}),
- newToken(1, "2ByteChar", []byte{0xdf, 0xbf}),
+ newToken(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
+ newToken(1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
+ newToken(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
+ newToken(1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
newEOFToken(),
},
},
@@ -190,7 +190,7 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xa0, 0x80,
}),
tokens: []*Token{
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
newEOFToken(),
},
},
@@ -208,10 +208,10 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xa0, 0xbf,
}),
tokens: []*Token{
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
newEOFToken(),
},
},
@@ -229,10 +229,10 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
newEOFToken(),
},
},
@@ -262,22 +262,22 @@ func TestLexer_Next(t *testing.T) {
0xef, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
- newToken(1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
- newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x80}),
- newToken(1, "3ByteChar", []byte{0xe1, 0x80, 0x81}),
- newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}),
- newToken(1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}),
- newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x80}),
- newToken(1, "3ByteChar", []byte{0xed, 0x80, 0x81}),
- newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}),
- newToken(1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}),
- newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x80}),
- newToken(1, "3ByteChar", []byte{0xee, 0x80, 0x81}),
- newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}),
- newToken(1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
+ newToken(1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
newEOFToken(),
},
},
@@ -292,7 +292,7 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0x80, 0x80,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
newEOFToken(),
},
},
@@ -310,10 +310,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0x80, 0xbf,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}),
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
newEOFToken(),
},
},
@@ -331,10 +331,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}),
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
newEOFToken(),
},
},
@@ -352,10 +352,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0xbf, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
- newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
- newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
newEOFToken(),
},
},
@@ -381,18 +381,18 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
- newToken(1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
- newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
- newToken(1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
- newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}),
- newToken(1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}),
- newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
- newToken(1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
- newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}),
- newToken(1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}),
- newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
- newToken(1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
+ newToken(1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
newEOFToken(),
},
},
@@ -404,7 +404,7 @@ func TestLexer_Next(t *testing.T) {
},
src: "foo9",
tokens: []*Token{
- newToken(1, "NonNumber", []byte("foo9")),
+ newToken(1, "NonNumber", newByteSequence([]byte("foo9"))),
newEOFToken(),
},
},
@@ -451,11 +451,11 @@ func TestLexer_PeekN(t *testing.T) {
expectedTokens := []*Token{
{
ID: 1,
- Match: []byte("foo"),
+ Match: newByteSequence([]byte("foo")),
},
{
ID: 2,
- Match: []byte("bar"),
+ Match: newByteSequence([]byte("bar")),
},
{
EOF: true,