aboutsummaryrefslogtreecommitdiff
path: root/driver
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-09-11 00:40:05 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-09-11 22:57:17 +0900
commit96a555a00f000704c618c226485fa6d87ce66d9d (patch)
tree9d7398033a2c015390f0de7ab69b6fd37bb1ba30 /driver
parentRemove --debug option from the lex command (diff)
downloadtre-96a555a00f000704c618c226485fa6d87ce66d9d.tar.gz
tre-96a555a00f000704c618c226485fa6d87ce66d9d.tar.xz
Define a lexical specification interface
Diffstat (limited to 'driver')
-rw-r--r--driver/lexer.go192
-rw-r--r--driver/lexer_test.go429
-rw-r--r--driver/spec.go73
3 files changed, 356 insertions, 338 deletions
diff --git a/driver/lexer.go b/driver/lexer.go
index bce8d8c..d8230d2 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -5,71 +5,60 @@ import (
"fmt"
"io"
"io/ioutil"
- "strings"
-
- "github.com/nihei9/maleeni/spec"
)
-type byteSequence []byte
+type ModeID int
-func newByteSequence(b []byte) byteSequence {
- return byteSequence(b)
+func (id ModeID) Int() int {
+ return int(id)
}
-func (s byteSequence) ByteSlice() []byte {
- return []byte(s)
-}
+type StateID int
-func (s byteSequence) String() string {
- if len(s) <= 0 {
- return ""
- }
- var b strings.Builder
- fmt.Fprintf(&b, "%X", s[0])
- for _, d := range s[1:] {
- fmt.Fprintf(&b, " %X", d)
- }
- return b.String()
+func (id StateID) Int() int {
+ return int(id)
}
-func (s byteSequence) GoString() string {
- return fmt.Sprintf("\"%v\"", s.String())
+type KindID int
+
+func (id KindID) Int() int {
+ return int(id)
}
-func (s byteSequence) MarshalJSON() ([]byte, error) {
- if len(s) <= 0 {
- return []byte("[]"), nil
- }
- var b strings.Builder
- fmt.Fprintf(&b, "[%v", uint8(s[0]))
- for _, e := range s[1:] {
- fmt.Fprintf(&b, ", %v", uint8(e))
- }
- fmt.Fprintf(&b, "]")
- return []byte(b.String()), nil
+type ModeKindID int
+
+func (id ModeKindID) Int() int {
+ return int(id)
}
-func (s byteSequence) merge(a byteSequence) byteSequence {
- return append([]byte(s), []byte(a)...)
+type LexSpec interface {
+ InitialMode() ModeID
+ Pop(mode ModeID, modeKind ModeKindID) bool
+ Push(mode ModeID, modeKind ModeKindID) (ModeID, bool)
+ ModeName(mode ModeID) string
+ InitialState(mode ModeID) StateID
+ NextState(mode ModeID, state StateID, v int) (StateID, bool)
+ Accept(mode ModeID, state StateID) (ModeKindID, bool)
+ KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string)
}
// Token representes a token.
type Token struct {
// ModeID is an ID of a lex mode.
- ModeID spec.LexModeID
+ ModeID ModeID
// ModeName is a name of a lex mode.
- ModeName spec.LexModeName
+ ModeName string
// KindID is an ID of a kind. This is unique among all modes.
- KindID spec.LexKindID
+ KindID KindID
// ModeKindID is an ID of a lexical kind. This is unique only within a mode.
// Note that you need to use KindID field if you want to identify a kind across all modes.
- ModeKindID spec.LexModeKindID
+ ModeKindID ModeKindID
// KindName is a name of a lexical kind.
- KindName spec.LexKindName
+ KindName string
// Row is a row number where a lexeme appears.
Row int
@@ -78,59 +67,37 @@ type Token struct {
// Note that Col is counted in code points, not bytes.
Col int
+ // Lexeme is a byte sequence matched a pattern of a lexical specification.
+ Lexeme []byte
+
// When this field is true, it means the token is the EOF token.
EOF bool
// When this field is true, it means the token is an error token.
Invalid bool
-
- // match is a byte sequence matched a pattern of a lexical specification.
- match byteSequence
-}
-
-func (t *Token) String() string {
- if t.Invalid {
- return fmt.Sprintf("!{mode id: %v, mode name: %v, row: %v, col: %v, text: %v, byte: %v}", t.ModeID, t.ModeName, t.Row, t.Col, t.Text(), t.Match())
- }
- if t.EOF {
- return fmt.Sprintf("{kind name: eof, row: %v, col: %v}", t.Row, t.Col)
- }
- return fmt.Sprintf("{mode id: %v, mode name: %v, kind id: %v, mode kind id: %v, kind name: %v, row: %v, col: %v, text: %v, byte: %v}", t.ModeID, t.ModeName, t.KindID, t.ModeKindID, t.KindName, t.Row, t.Col, t.Text(), t.Match())
-}
-
-// Match returns a byte slice matched a pattern of a lexical specification.
-func (t *Token) Match() []byte {
- return t.match.ByteSlice()
-}
-
-// Text returns a string representation of a matched byte sequence.
-func (t *Token) Text() string {
- return string(t.Match())
}
func (t *Token) MarshalJSON() ([]byte, error) {
return json.Marshal(struct {
- ModeID int `json:"mode_id"`
- ModeName string `json:"mode_name"`
- KindID int `json:"kind_id"`
- ModeKindID int `json:"mode_kind_id"`
- KindName string `json:"kind_name"`
- Row int `json:"row"`
- Col int `json:"col"`
- Match byteSequence `json:"match"`
- Text string `json:"text"`
- EOF bool `json:"eof"`
- Invalid bool `json:"invalid"`
+ ModeID int `json:"mode_id"`
+ ModeName string `json:"mode_name"`
+ KindID int `json:"kind_id"`
+ ModeKindID int `json:"mode_kind_id"`
+ KindName string `json:"kind_name"`
+ Row int `json:"row"`
+ Col int `json:"col"`
+ Lexeme string `json:"lexeme"`
+ EOF bool `json:"eof"`
+ Invalid bool `json:"invalid"`
}{
ModeID: t.ModeID.Int(),
- ModeName: t.ModeName.String(),
+ ModeName: t.ModeName,
KindID: t.KindID.Int(),
ModeKindID: t.ModeKindID.Int(),
- KindName: t.KindName.String(),
+ KindName: t.KindName,
Row: t.Row,
Col: t.Col,
- Match: t.match,
- Text: t.Text(),
+ Lexeme: string(t.Lexeme),
EOF: t.EOF,
Invalid: t.Invalid,
})
@@ -146,7 +113,7 @@ func DisableModeTransition() LexerOption {
}
type Lexer struct {
- clspec *spec.CompiledLexSpec
+ spec LexSpec
src []byte
srcPtr int
row int
@@ -154,23 +121,23 @@ type Lexer struct {
prevRow int
prevCol int
tokBuf []*Token
- modeStack []spec.LexModeID
+ modeStack []ModeID
passiveModeTran bool
}
-func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader, opts ...LexerOption) (*Lexer, error) {
+func NewLexer(spec LexSpec, src io.Reader, opts ...LexerOption) (*Lexer, error) {
b, err := ioutil.ReadAll(src)
if err != nil {
return nil, err
}
l := &Lexer{
- clspec: clspec,
+ spec: spec,
src: b,
srcPtr: 0,
row: 0,
col: 0,
- modeStack: []spec.LexModeID{
- clspec.InitialModeID,
+ modeStack: []ModeID{
+ spec.InitialMode(),
},
passiveModeTran: false,
}
@@ -207,7 +174,7 @@ func (l *Lexer) Next() (*Token, error) {
if !tok.Invalid {
break
}
- errTok.match = errTok.match.merge(tok.match)
+ errTok.Lexeme = append(errTok.Lexeme, tok.Lexeme...)
}
l.tokBuf = append(l.tokBuf, tok)
@@ -225,15 +192,14 @@ func (l *Lexer) nextAndTransition() (*Token, error) {
if l.passiveModeTran {
return tok, nil
}
- spec := l.clspec.Specs[l.Mode()]
- if spec.Pop[tok.ModeKindID] == 1 {
+ mode := l.Mode()
+ if l.spec.Pop(mode, tok.ModeKindID) {
err := l.PopMode()
if err != nil {
return nil, err
}
}
- mode := spec.Push[tok.ModeKindID]
- if !mode.IsNil() {
+ if mode, ok := l.spec.Push(mode, tok.ModeKindID); ok {
l.PushMode(mode)
}
// The checking length of the mode stack must be at after pop and push operations
@@ -249,9 +215,8 @@ func (l *Lexer) nextAndTransition() (*Token, error) {
func (l *Lexer) next() (*Token, error) {
mode := l.Mode()
- modeName := l.clspec.ModeNames[mode]
- spec := l.clspec.Specs[mode]
- state := spec.DFA.InitialStateID
+ modeName := l.spec.ModeName(mode)
+ state := l.spec.InitialState(mode)
buf := []byte{}
unfixedBufLen := 0
row := l.row
@@ -271,9 +236,9 @@ func (l *Lexer) next() (*Token, error) {
ModeID: mode,
ModeName: modeName,
ModeKindID: 0,
+ Lexeme: buf,
Row: row,
Col: col,
- match: newByteSequence(buf),
Invalid: true,
}, nil
}
@@ -288,7 +253,7 @@ func (l *Lexer) next() (*Token, error) {
}
buf = append(buf, v)
unfixedBufLen++
- nextState, ok := l.lookupNextState(mode, state, int(v))
+ nextState, ok := l.spec.NextState(mode, state, int(v))
if !ok {
if tok != nil {
l.unread(unfixedBufLen)
@@ -298,62 +263,35 @@ func (l *Lexer) next() (*Token, error) {
ModeID: mode,
ModeName: modeName,
ModeKindID: 0,
+ Lexeme: buf,
Row: row,
Col: col,
- match: newByteSequence(buf),
Invalid: true,
}, nil
}
state = nextState
- modeKindID := spec.DFA.AcceptingStates[state]
- if modeKindID != 0 {
- kindID := l.clspec.KindIDs[mode][modeKindID]
+ if modeKindID, ok := l.spec.Accept(mode, state); ok {
+ kindID, kindName := l.spec.KindIDAndName(mode, modeKindID)
tok = &Token{
ModeID: mode,
ModeName: modeName,
KindID: kindID,
ModeKindID: modeKindID,
- KindName: spec.KindNames[modeKindID],
+ KindName: kindName,
+ Lexeme: buf,
Row: row,
Col: col,
- match: newByteSequence(buf),
}
unfixedBufLen = 0
}
}
}
-func (l *Lexer) lookupNextState(mode spec.LexModeID, state spec.StateID, v int) (spec.StateID, bool) {
- switch l.clspec.CompressionLevel {
- case 2:
- tab := l.clspec.Specs[mode].DFA.Transition
- rowNum := tab.RowNums[state]
- d := tab.UniqueEntries.RowDisplacement[rowNum]
- if tab.UniqueEntries.Bounds[d+v] != rowNum {
- return tab.UniqueEntries.EmptyValue, false
- }
- return tab.UniqueEntries.Entries[d+v], true
- case 1:
- tab := l.clspec.Specs[mode].DFA.Transition
- next := tab.UncompressedUniqueEntries[tab.RowNums[state]*tab.OriginalColCount+v]
- if next == spec.StateIDNil {
- return spec.StateIDNil, false
- }
- return next, true
- }
- modeSpec := l.clspec.Specs[mode]
- next := modeSpec.DFA.UncompressedTransition[state.Int()*modeSpec.DFA.ColCount+v]
- if next == spec.StateIDNil {
- return spec.StateIDNil, false
- }
- return next, true
-}
-
-func (l *Lexer) Mode() spec.LexModeID {
+func (l *Lexer) Mode() ModeID {
return l.modeStack[len(l.modeStack)-1]
}
-func (l *Lexer) PushMode(mode spec.LexModeID) {
+func (l *Lexer) PushMode(mode ModeID) {
l.modeStack = append(l.modeStack, mode)
}
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 33b206f..ebb4aad 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -42,22 +42,29 @@ func newLexEntryFragment(kind string, pattern string) *spec.LexEntry {
}
}
-func newToken(modeID spec.LexModeID, modeName spec.LexModeName, kindID spec.LexKindID, modeKindID spec.LexModeKindID, kindName spec.LexKindName, match byteSequence) *Token {
+func newToken(modeID ModeID, modeName string, kindID KindID, modeKindID ModeKindID, kindName string, lexeme []byte) *Token {
return &Token{
ModeID: modeID,
ModeName: modeName,
KindID: kindID,
ModeKindID: modeKindID,
KindName: kindName,
- match: match,
+ Lexeme: lexeme,
}
}
-func newTokenDefault(kindID int, modeKindID int, kindName string, match byteSequence) *Token {
- return newToken(spec.LexModeIDDefault, spec.LexModeNameDefault, spec.LexKindID(kindID), spec.LexModeKindID(modeKindID), spec.LexKindName(kindName), match)
+func newTokenDefault(kindID int, modeKindID int, kindName string, lexeme []byte) *Token {
+ return newToken(
+ ModeID(spec.LexModeIDDefault.Int()),
+ spec.LexModeNameDefault.String(),
+ KindID(spec.LexKindID(kindID).Int()),
+ ModeKindID(spec.LexModeKindID(modeKindID).Int()),
+ spec.LexKindName(kindName).String(),
+ lexeme,
+ )
}
-func newEOFToken(modeID spec.LexModeID, modeName spec.LexModeName) *Token {
+func newEOFToken(modeID ModeID, modeName string) *Token {
return &Token{
ModeID: modeID,
ModeName: modeName,
@@ -67,15 +74,15 @@ func newEOFToken(modeID spec.LexModeID, modeName spec.LexModeName) *Token {
}
func newEOFTokenDefault() *Token {
- return newEOFToken(spec.LexModeIDDefault, spec.LexModeNameDefault)
+ return newEOFToken(ModeID(spec.LexModeIDDefault.Int()), spec.LexModeNameDefault.String())
}
-func newInvalidToken(modeID spec.LexModeID, modeName spec.LexModeName, match byteSequence) *Token {
+func newInvalidToken(modeID ModeID, modeName string, lexeme []byte) *Token {
return &Token{
ModeID: modeID,
ModeName: modeName,
ModeKindID: 0,
- match: match,
+ Lexeme: lexeme,
Invalid: true,
}
}
@@ -103,17 +110,17 @@ func TestLexer_Next(t *testing.T) {
},
src: "abb aabb aaabb babb bbabb abbbabb",
tokens: []*Token{
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("abb"))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("aabb"))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaabb"))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("babb"))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("bbabb"))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte(" "))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("abbbabb"))),
+ newTokenDefault(1, 1, "t1", []byte("abb")),
+ newTokenDefault(2, 2, "t2", []byte(" ")),
+ newTokenDefault(1, 1, "t1", []byte("aabb")),
+ newTokenDefault(2, 2, "t2", []byte(" ")),
+ newTokenDefault(1, 1, "t1", []byte("aaabb")),
+ newTokenDefault(2, 2, "t2", []byte(" ")),
+ newTokenDefault(1, 1, "t1", []byte("babb")),
+ newTokenDefault(2, 2, "t2", []byte(" ")),
+ newTokenDefault(1, 1, "t1", []byte("bbabb")),
+ newTokenDefault(2, 2, "t2", []byte(" ")),
+ newTokenDefault(1, 1, "t1", []byte("abbbabb")),
newEOFTokenDefault(),
},
},
@@ -127,21 +134,21 @@ func TestLexer_Next(t *testing.T) {
},
src: "ba baaa a aaa abcd abcdcdcd cd cdcdcd",
tokens: []*Token{
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("ba"))),
- newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("baaa"))),
- newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))),
- newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("aaa"))),
- newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcd"))),
- newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte("abcdcdcd"))),
- newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte("cd"))),
- newTokenDefault(3, 3, "t3", newByteSequence([]byte(" "))),
- newTokenDefault(2, 2, "t2", newByteSequence([]byte("cdcdcd"))),
+ newTokenDefault(1, 1, "t1", []byte("ba")),
+ newTokenDefault(3, 3, "t3", []byte(" ")),
+ newTokenDefault(1, 1, "t1", []byte("baaa")),
+ newTokenDefault(3, 3, "t3", []byte(" ")),
+ newTokenDefault(1, 1, "t1", []byte("a")),
+ newTokenDefault(3, 3, "t3", []byte(" ")),
+ newTokenDefault(1, 1, "t1", []byte("aaa")),
+ newTokenDefault(3, 3, "t3", []byte(" ")),
+ newTokenDefault(2, 2, "t2", []byte("abcd")),
+ newTokenDefault(3, 3, "t3", []byte(" ")),
+ newTokenDefault(2, 2, "t2", []byte("abcdcdcd")),
+ newTokenDefault(3, 3, "t3", []byte(" ")),
+ newTokenDefault(2, 2, "t2", []byte("cd")),
+ newTokenDefault(3, 3, "t3", []byte(" ")),
+ newTokenDefault(2, 2, "t2", []byte("cdcdcd")),
newEOFTokenDefault(),
},
},
@@ -170,22 +177,22 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x00})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0x7f})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xc2, 0x80})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xdf, 0xbf})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xe1, 0x80, 0x80})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x80, 0x80})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xee, 0x80, 0x80})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "t1", []byte{0x00}),
+ newTokenDefault(1, 1, "t1", []byte{0x7f}),
+ newTokenDefault(1, 1, "t1", []byte{0xc2, 0x80}),
+ newTokenDefault(1, 1, "t1", []byte{0xdf, 0xbf}),
+ newTokenDefault(1, 1, "t1", []byte{0xe1, 0x80, 0x80}),
+ newTokenDefault(1, 1, "t1", []byte{0xec, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "t1", []byte{0xed, 0x80, 0x80}),
+ newTokenDefault(1, 1, "t1", []byte{0xed, 0x9f, 0xbf}),
+ newTokenDefault(1, 1, "t1", []byte{0xee, 0x80, 0x80}),
+ newTokenDefault(1, 1, "t1", []byte{0xef, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}),
+ newTokenDefault(1, 1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}),
+ newTokenDefault(1, 1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -197,17 +204,17 @@ func TestLexer_Next(t *testing.T) {
},
src: "ab.*+?|()[]",
tokens: []*Token{
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("a"))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("b"))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("."))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("*"))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("+"))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("?"))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("|"))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("("))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte(")"))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("["))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("]"))),
+ newTokenDefault(1, 1, "t1", []byte("a")),
+ newTokenDefault(1, 1, "t1", []byte("b")),
+ newTokenDefault(1, 1, "t1", []byte(".")),
+ newTokenDefault(1, 1, "t1", []byte("*")),
+ newTokenDefault(1, 1, "t1", []byte("+")),
+ newTokenDefault(1, 1, "t1", []byte("?")),
+ newTokenDefault(1, 1, "t1", []byte("|")),
+ newTokenDefault(1, 1, "t1", []byte("(")),
+ newTokenDefault(1, 1, "t1", []byte(")")),
+ newTokenDefault(1, 1, "t1", []byte("[")),
+ newTokenDefault(1, 1, "t1", []byte("]")),
newEOFTokenDefault(),
},
},
@@ -230,10 +237,10 @@ func TestLexer_Next(t *testing.T) {
0x7f,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x01})),
- newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x02})),
- newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7e})),
- newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x7f})),
+ newTokenDefault(1, 1, "1ByteChar", []byte{0x01}),
+ newTokenDefault(1, 1, "1ByteChar", []byte{0x02}),
+ newTokenDefault(1, 1, "1ByteChar", []byte{0x7e}),
+ newTokenDefault(1, 1, "1ByteChar", []byte{0x7f}),
newEOFTokenDefault(),
},
},
@@ -251,10 +258,10 @@ func TestLexer_Next(t *testing.T) {
0xdf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x80})),
- newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xc2, 0x81})),
- newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbe})),
- newTokenDefault(1, 1, "2ByteChar", newByteSequence([]byte{0xdf, 0xbf})),
+ newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x80}),
+ newTokenDefault(1, 1, "2ByteChar", []byte{0xc2, 0x81}),
+ newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbe}),
+ newTokenDefault(1, 1, "2ByteChar", []byte{0xdf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -269,7 +276,7 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xa0, 0x80,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
newEOFTokenDefault(),
},
},
@@ -287,10 +294,10 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xa0, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbe})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbe}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -308,10 +315,10 @@ func TestLexer_Next(t *testing.T) {
0xe0, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -341,22 +348,22 @@ func TestLexer_Next(t *testing.T) {
0xef, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x80})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xa0, 0x81})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe0, 0xbf, 0xbf})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x80})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xe1, 0x80, 0x81})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xec, 0xbf, 0xbf})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x80})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x80, 0x81})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbe})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xed, 0x9f, 0xbf})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x80})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xee, 0x80, 0x81})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "3ByteChar", newByteSequence([]byte{0xef, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x80}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xa0, 0x81}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe0, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x80}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xe1, 0x80, 0x81}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xec, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x80}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x80, 0x81}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbe}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xed, 0x9f, 0xbf}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x80}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xee, 0x80, 0x81}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "3ByteChar", []byte{0xef, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -371,7 +378,7 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0x80, 0x80,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
newEOFTokenDefault(),
},
},
@@ -389,10 +396,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0x80, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbe})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbe}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -410,10 +417,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0x90, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -431,10 +438,10 @@ func TestLexer_Next(t *testing.T) {
0xf0, 0xbf, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -460,18 +467,18 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x80})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0x90, 0x80, 0x81})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf0, 0xbf, 0xbf, 0xbf})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x80})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf1, 0x80, 0x80, 0x81})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf3, 0xbf, 0xbf, 0xbf})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x80})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x80, 0x80, 0x81})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbe})),
- newTokenDefault(1, 1, "4ByteChar", newByteSequence([]byte{0xf4, 0x8f, 0xbf, 0xbf})),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0x90, 0x80, 0x81}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x80}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf1, 0x80, 0x80, 0x81}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x80}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x80, 0x80, 0x81}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbe}),
+ newTokenDefault(1, 1, "4ByteChar", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
newEOFTokenDefault(),
},
},
@@ -483,7 +490,7 @@ func TestLexer_Next(t *testing.T) {
},
src: "foo9",
tokens: []*Token{
- newTokenDefault(1, 1, "NonNumber", newByteSequence([]byte("foo9"))),
+ newTokenDefault(1, 1, "NonNumber", []byte("foo9")),
newEOFTokenDefault(),
},
},
@@ -498,10 +505,10 @@ func TestLexer_Next(t *testing.T) {
},
src: "nνに😸",
tokens: []*Token{
- newTokenDefault(1, 1, "1ByteChar", newByteSequence([]byte{0x6E})),
- newTokenDefault(2, 2, "2ByteChar", newByteSequence([]byte{0xCE, 0xBD})),
- newTokenDefault(3, 3, "3ByteChar", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
- newTokenDefault(4, 4, "4ByteChar", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+ newTokenDefault(1, 1, "1ByteChar", []byte{0x6E}),
+ newTokenDefault(2, 2, "2ByteChar", []byte{0xCE, 0xBD}),
+ newTokenDefault(3, 3, "3ByteChar", []byte{0xE3, 0x81, 0xAB}),
+ newTokenDefault(4, 4, "4ByteChar", []byte{0xF0, 0x9F, 0x98, 0xB8}),
newEOFTokenDefault(),
},
},
@@ -513,10 +520,10 @@ func TestLexer_Next(t *testing.T) {
},
src: "nνに😸",
tokens: []*Token{
- newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0x6E})),
- newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xCE, 0xBD})),
- newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xE3, 0x81, 0xAB})),
- newTokenDefault(1, 1, "codePointsAlt", newByteSequence([]byte{0xF0, 0x9F, 0x98, 0xB8})),
+ newTokenDefault(1, 1, "codePointsAlt", []byte{0x6E}),
+ newTokenDefault(1, 1, "codePointsAlt", []byte{0xCE, 0xBD}),
+ newTokenDefault(1, 1, "codePointsAlt", []byte{0xE3, 0x81, 0xAB}),
+ newTokenDefault(1, 1, "codePointsAlt", []byte{0xF0, 0x9F, 0x98, 0xB8}),
newEOFTokenDefault(),
},
},
@@ -530,8 +537,8 @@ func TestLexer_Next(t *testing.T) {
},
src: "abcdefdefabcdef",
tokens: []*Token{
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdef"))),
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdef"))),
+ newTokenDefault(1, 1, "t1", []byte("abcdefdef")),
+ newTokenDefault(1, 1, "t1", []byte("abcdef")),
newEOFTokenDefault(),
},
},
@@ -545,7 +552,7 @@ func TestLexer_Next(t *testing.T) {
},
src: "abcdefdefabc",
tokens: []*Token{
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))),
+ newTokenDefault(1, 1, "t1", []byte("abcdefdefabc")),
newEOFTokenDefault(),
},
},
@@ -560,7 +567,7 @@ func TestLexer_Next(t *testing.T) {
},
src: "abcdefdefabc",
tokens: []*Token{
- newTokenDefault(1, 1, "t1", newByteSequence([]byte("abcdefdefabc"))),
+ newTokenDefault(1, 1, "t1", []byte("abcdefdefabc")),
newEOFTokenDefault(),
},
},
@@ -576,16 +583,16 @@ func TestLexer_Next(t *testing.T) {
},
src: `"" "Hello world.\n\"Hello world.\""`,
tokens: []*Token{
- newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))),
- newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))),
- newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(1, "default", 2, 2, "string_open", newByteSequence([]byte(`"`))),
- newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
- newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\n`))),
- newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))),
- newToken(2, "string", 4, 2, "char_sequence", newByteSequence([]byte(`Hello world.`))),
- newToken(2, "string", 3, 1, "escape_sequence", newByteSequence([]byte(`\"`))),
- newToken(2, "string", 5, 3, "string_close", newByteSequence([]byte(`"`))),
+ newToken(1, "default", 2, 2, "string_open", []byte(`"`)),
+ newToken(2, "string", 5, 3, "string_close", []byte(`"`)),
+ newToken(1, "default", 1, 1, "white_space", []byte(` `)),
+ newToken(1, "default", 2, 2, "string_open", []byte(`"`)),
+ newToken(2, "string", 4, 2, "char_sequence", []byte(`Hello world.`)),
+ newToken(2, "string", 3, 1, "escape_sequence", []byte(`\n`)),
+ newToken(2, "string", 3, 1, "escape_sequence", []byte(`\"`)),
+ newToken(2, "string", 4, 2, "char_sequence", []byte(`Hello world.`)),
+ newToken(2, "string", 3, 1, "escape_sequence", []byte(`\"`)),
+ newToken(2, "string", 5, 3, "string_close", []byte(`"`)),
newEOFTokenDefault(),
},
},
@@ -602,15 +609,15 @@ func TestLexer_Next(t *testing.T) {
},
src: ` a b < < `,
tokens: []*Token{
- newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(1, "default", 2, 2, "char_a", newByteSequence([]byte(`a`))),
- newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "state_a", 3, 2, "char_b", newByteSequence([]byte(`b`))),
- newToken(3, "state_b", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(3, "state_b", 5, 2, "back_from_b", newByteSequence([]byte(`<`))),
- newToken(2, "state_a", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "state_a", 4, 3, "back_from_a", newByteSequence([]byte(`<`))),
- newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
+ newToken(1, "default", 1, 1, "white_space", []byte(` `)),
+ newToken(1, "default", 2, 2, "char_a", []byte(`a`)),
+ newToken(2, "state_a", 1, 1, "white_space", []byte(` `)),
+ newToken(2, "state_a", 3, 2, "char_b", []byte(`b`)),
+ newToken(3, "state_b", 1, 1, "white_space", []byte(` `)),
+ newToken(3, "state_b", 5, 2, "back_from_b", []byte(`<`)),
+ newToken(2, "state_a", 1, 1, "white_space", []byte(` `)),
+ newToken(2, "state_a", 4, 3, "back_from_a", []byte(`<`)),
+ newToken(1, "default", 1, 1, "white_space", []byte(` `)),
newEOFTokenDefault(),
},
},
@@ -627,20 +634,20 @@ func TestLexer_Next(t *testing.T) {
},
src: `-> 1 -> 2 <- <- a`,
tokens: []*Token{
- newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))),
- newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))),
- newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))),
- newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))),
- newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))),
+ newToken(1, "default", 3, 3, "push_1", []byte(`-> 1`)),
+ newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)),
+ newToken(2, "mode_1", 4, 2, "push_2", []byte(`-> 2`)),
+ newToken(3, "mode_2", 1, 1, "white_space", []byte(` `)),
+ newToken(3, "mode_2", 6, 2, "pop_2", []byte(`<-`)),
+ newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)),
+ newToken(2, "mode_1", 5, 3, "pop_1", []byte(`<-`)),
+ newToken(1, "default", 1, 1, "white_space", []byte(` `)),
+ newToken(1, "default", 2, 2, "char", []byte(`a`)),
newEOFTokenDefault(),
},
passiveModeTran: true,
tran: func(l *Lexer, tok *Token) error {
- switch l.clspec.ModeNames[l.Mode()] {
+ switch l.spec.ModeName(l.Mode()) {
case "default":
switch tok.KindName {
case "push_1":
@@ -675,21 +682,21 @@ func TestLexer_Next(t *testing.T) {
},
src: `-> 1 -> 2 <- <- a`,
tokens: []*Token{
- newToken(1, "default", 3, 3, "push_1", newByteSequence([]byte(`-> 1`))),
- newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "mode_1", 4, 2, "push_2", newByteSequence([]byte(`-> 2`))),
- newToken(3, "mode_2", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(3, "mode_2", 6, 2, "pop_2", newByteSequence([]byte(`<-`))),
- newToken(2, "mode_1", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(2, "mode_1", 5, 3, "pop_1", newByteSequence([]byte(`<-`))),
- newToken(1, "default", 1, 1, "white_space", newByteSequence([]byte(` `))),
- newToken(1, "default", 2, 2, "char", newByteSequence([]byte(`a`))),
+ newToken(1, "default", 3, 3, "push_1", []byte(`-> 1`)),
+ newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)),
+ newToken(2, "mode_1", 4, 2, "push_2", []byte(`-> 2`)),
+ newToken(3, "mode_2", 1, 1, "white_space", []byte(` `)),
+ newToken(3, "mode_2", 6, 2, "pop_2", []byte(`<-`)),
+ newToken(2, "mode_1", 1, 1, "white_space", []byte(` `)),
+ newToken(2, "mode_1", 5, 3, "pop_1", []byte(`<-`)),
+ newToken(1, "default", 1, 1, "white_space", []byte(` `)),
+ newToken(1, "default", 2, 2, "char", []byte(`a`)),
newEOFTokenDefault(),
},
// Active mode transition and an external transition function can be used together.
passiveModeTran: false,
tran: func(l *Lexer, tok *Token) error {
- switch l.clspec.ModeNames[l.Mode()] {
+ switch l.spec.ModeName(l.Mode()) {
case "mode_1":
switch tok.KindName {
case "push_2":
@@ -717,15 +724,15 @@ func TestLexer_Next(t *testing.T) {
},
src: `.*+?|()[\`,
tokens: []*Token{
- newTokenDefault(1, 1, "dot", newByteSequence([]byte(`.`))),
- newTokenDefault(2, 2, "star", newByteSequence([]byte(`*`))),
- newTokenDefault(3, 3, "plus", newByteSequence([]byte(`+`))),
- newTokenDefault(4, 4, "question", newByteSequence([]byte(`?`))),
- newTokenDefault(5, 5, "vbar", newByteSequence([]byte(`|`))),
- newTokenDefault(6, 6, "lparen", newByteSequence([]byte(`(`))),
- newTokenDefault(7, 7, "rparen", newByteSequence([]byte(`)`))),
- newTokenDefault(8, 8, "lbrace", newByteSequence([]byte(`[`))),
- newTokenDefault(9, 9, "backslash", newByteSequence([]byte(`\`))),
+ newTokenDefault(1, 1, "dot", []byte(`.`)),
+ newTokenDefault(2, 2, "star", []byte(`*`)),
+ newTokenDefault(3, 3, "plus", []byte(`+`)),
+ newTokenDefault(4, 4, "question", []byte(`?`)),
+ newTokenDefault(5, 5, "vbar", []byte(`|`)),
+ newTokenDefault(6, 6, "lparen", []byte(`(`)),
+ newTokenDefault(7, 7, "rparen", []byte(`)`)),
+ newTokenDefault(8, 8, "lbrace", []byte(`[`)),
+ newTokenDefault(9, 9, "backslash", []byte(`\`)),
newEOFTokenDefault(),
},
},
@@ -741,7 +748,7 @@ func TestLexer_Next(t *testing.T) {
if tt.passiveModeTran {
opts = append(opts, DisableModeTransition())
}
- lexer, err := NewLexer(clspec, strings.NewReader(tt.src), opts...)
+ lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(tt.src), opts...)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@@ -752,7 +759,7 @@ func TestLexer_Next(t *testing.T) {
break
}
testToken(t, eTok, tok, false)
- // t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match(), tok.Text(), tok.EOF, tok.Invalid)
+
if tok.EOF {
break
}
@@ -813,39 +820,39 @@ func TestLexer_Next_WithPosition(t *testing.T) {
})
expected := []*Token{
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0x00})), 0, 0),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0x7F})), 0, 1),
- withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 0, 2),
-
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xC2, 0x80})), 1, 0),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xDF, 0xBF})), 1, 1),
- withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 1, 2),
-
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE0, 0xA0, 0x80})), 2, 0),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE0, 0xBF, 0xBF})), 2, 1),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xE1, 0x80, 0x80})), 2, 2),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEC, 0xBF, 0xBF})), 2, 3),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xED, 0x80, 0x80})), 2, 4),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xED, 0x9F, 0xBF})), 2, 5),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEE, 0x80, 0x80})), 2, 6),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xEF, 0xBF, 0xBF})), 2, 7),
- withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A})), 2, 8),
-
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF0, 0x90, 0x80, 0x80})), 3, 0),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF0, 0xBF, 0xBF, 0xBF})), 3, 1),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF1, 0x80, 0x80, 0x80})), 3, 2),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF3, 0xBF, 0xBF, 0xBF})), 3, 3),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF4, 0x80, 0x80, 0x80})), 3, 4),
- withPos(newTokenDefault(2, 2, "any", newByteSequence([]byte{0xF4, 0x8F, 0xBF, 0xBF})), 3, 5),
+ withPos(newTokenDefault(2, 2, "any", []byte{0x00}), 0, 0),
+ withPos(newTokenDefault(2, 2, "any", []byte{0x7F}), 0, 1),
+ withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 0, 2),
+
+ withPos(newTokenDefault(2, 2, "any", []byte{0xC2, 0x80}), 1, 0),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xDF, 0xBF}), 1, 1),
+ withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 1, 2),
+
+ withPos(newTokenDefault(2, 2, "any", []byte{0xE0, 0xA0, 0x80}), 2, 0),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xE0, 0xBF, 0xBF}), 2, 1),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xE1, 0x80, 0x80}), 2, 2),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xEC, 0xBF, 0xBF}), 2, 3),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xED, 0x80, 0x80}), 2, 4),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xED, 0x9F, 0xBF}), 2, 5),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xEE, 0x80, 0x80}), 2, 6),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xEF, 0xBF, 0xBF}), 2, 7),
+ withPos(newTokenDefault(1, 1, "newline", []byte{0x0A}), 2, 8),
+
+ withPos(newTokenDefault(2, 2, "any", []byte{0xF0, 0x90, 0x80, 0x80}), 3, 0),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xF0, 0xBF, 0xBF, 0xBF}), 3, 1),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xF1, 0x80, 0x80, 0x80}), 3, 2),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xF3, 0xBF, 0xBF, 0xBF}), 3, 3),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xF4, 0x80, 0x80, 0x80}), 3, 4),
+ withPos(newTokenDefault(2, 2, "any", []byte{0xF4, 0x8F, 0xBF, 0xBF}), 3, 5),
// When a token contains multiple line breaks, the driver sets the token position to
// the line number where a lexeme first appears.
- withPos(newTokenDefault(1, 1, "newline", newByteSequence([]byte{0x0A, 0x0A, 0x0A})), 3, 6),
+ withPos(newTokenDefault(1, 1, "newline", []byte{0x0A, 0x0A, 0x0A}), 3, 6),
withPos(newEOFTokenDefault(), 0, 0),
}
- lexer, err := NewLexer(clspec, strings.NewReader(src))
+ lexer, err := NewLexer(NewLexSpec(clspec), strings.NewReader(src))
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@@ -872,15 +879,15 @@ func testToken(t *testing.T, expected, actual *Token, checkPosition bool) {
actual.KindID != expected.KindID ||
actual.ModeKindID != expected.ModeKindID ||
actual.KindName != expected.KindName ||
- !bytes.Equal(actual.Match(), expected.Match()) ||
+ !bytes.Equal(actual.Lexeme, expected.Lexeme) ||
actual.EOF != expected.EOF ||
actual.Invalid != expected.Invalid {
- t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text())
+ t.Fatalf(`unexpected token; want: %v ("%#v"), got: %v ("%#v")`, expected, string(expected.Lexeme), actual, string(actual.Lexeme))
}
if checkPosition {
if actual.Row != expected.Row || actual.Col != expected.Col {
- t.Fatalf(`unexpected token; want: %v ("%v"), got: %v ("%v")`, expected, expected.Text(), actual, actual.Text())
+ t.Fatalf(`unexpected token; want: %v ("%#v"), got: %v ("%#v")`, expected, string(expected.Lexeme), actual, string(actual.Lexeme))
}
}
}
diff --git a/driver/spec.go b/driver/spec.go
new file mode 100644
index 0000000..149d922
--- /dev/null
+++ b/driver/spec.go
@@ -0,0 +1,73 @@
+package driver
+
+import (
+ "github.com/nihei9/maleeni/spec"
+)
+
+type lexSpec struct {
+ spec *spec.CompiledLexSpec
+}
+
+func NewLexSpec(spec *spec.CompiledLexSpec) *lexSpec {
+ return &lexSpec{
+ spec: spec,
+ }
+}
+
+func (s *lexSpec) InitialMode() ModeID {
+ return ModeID(s.spec.InitialModeID.Int())
+}
+
+func (s *lexSpec) Pop(mode ModeID, modeKind ModeKindID) bool {
+ return s.spec.Specs[mode].Pop[modeKind] == 1
+}
+
+func (s *lexSpec) Push(mode ModeID, modeKind ModeKindID) (ModeID, bool) {
+ modeID := s.spec.Specs[mode].Push[modeKind]
+ return ModeID(modeID.Int()), !modeID.IsNil()
+}
+
+func (s *lexSpec) ModeName(mode ModeID) string {
+ return s.spec.ModeNames[mode].String()
+}
+
+func (s *lexSpec) InitialState(mode ModeID) StateID {
+ return StateID(s.spec.Specs[mode].DFA.InitialStateID.Int())
+}
+
+func (s *lexSpec) NextState(mode ModeID, state StateID, v int) (StateID, bool) {
+ switch s.spec.CompressionLevel {
+ case 2:
+ tran := s.spec.Specs[mode].DFA.Transition
+ rowNum := tran.RowNums[state]
+ d := tran.UniqueEntries.RowDisplacement[rowNum]
+ if tran.UniqueEntries.Bounds[d+v] != rowNum {
+ return StateID(tran.UniqueEntries.EmptyValue.Int()), false
+ }
+ return StateID(tran.UniqueEntries.Entries[d+v].Int()), true
+ case 1:
+ tran := s.spec.Specs[mode].DFA.Transition
+ next := tran.UncompressedUniqueEntries[tran.RowNums[state]*tran.OriginalColCount+v]
+ if next == spec.StateIDNil {
+ return StateID(spec.StateIDNil.Int()), false
+ }
+ return StateID(next.Int()), true
+ }
+
+ modeSpec := s.spec.Specs[mode]
+ next := modeSpec.DFA.UncompressedTransition[state.Int()*modeSpec.DFA.ColCount+v]
+ if next == spec.StateIDNil {
+ return StateID(spec.StateIDNil), false
+ }
+ return StateID(next.Int()), true
+}
+
+func (s *lexSpec) Accept(mode ModeID, state StateID) (ModeKindID, bool) {
+ modeKindID := s.spec.Specs[mode].DFA.AcceptingStates[state]
+ return ModeKindID(modeKindID.Int()), modeKindID != spec.LexModeKindIDNil
+}
+
+func (s *lexSpec) KindIDAndName(mode ModeID, modeKind ModeKindID) (KindID, string) {
+ kindID := s.spec.KindIDs[mode][modeKind]
+ return KindID(kindID.Int()), s.spec.KindNames[kindID].String()
+}