diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-02-16 00:07:40 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-02-16 00:07:40 +0900 |
commit | c313f7870bd547534c7c7bb0ad01003ab9983b34 (patch) | |
tree | f1b6a443c4372f9eb69f314009d721703e208a0b /driver | |
parent | Add bracket expression matching specified character (diff) | |
download | tre-c313f7870bd547534c7c7bb0ad01003ab9983b34.tar.gz tre-c313f7870bd547534c7c7bb0ad01003ab9983b34.tar.xz |
Add types of lexical specifications
APIs of compiler and driver packages use these types. Because CompiledLexSpec struct a lexer takes has kind names of lexical specification entries, the lexer sets them to tokens.
Diffstat (limited to 'driver')
-rw-r--r-- | driver/lexer.go | 30 | ||||
-rw-r--r-- | driver/lexer_test.go | 127 |
2 files changed, 78 insertions, 79 deletions
diff --git a/driver/lexer.go b/driver/lexer.go index 710d54d..3a6f039 100644 --- a/driver/lexer.go +++ b/driver/lexer.go @@ -5,19 +5,21 @@ import ( "io" "io/ioutil" - "github.com/nihei9/maleeni/compiler" + "github.com/nihei9/maleeni/spec" ) type Token struct { ID int + Kind string Match []byte EOF bool Invalid bool } -func newToken(id int, match []byte) *Token { +func newToken(id int, kind string, match []byte) *Token { return &Token{ ID: id, + Kind: kind, Match: match, } } @@ -38,21 +40,21 @@ func newInvalidToken(match []byte) *Token { } type lexer struct { - tranTab *compiler.TransitionTable - src []byte - srcPtr int - tokBuf []*Token + clspec *spec.CompiledLexSpec + src []byte + srcPtr int + tokBuf []*Token } -func NewLexer(tranTab *compiler.TransitionTable, src io.Reader) (*lexer, error) { +func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader) (*lexer, error) { b, err := ioutil.ReadAll(src) if err != nil { return nil, err } return &lexer{ - tranTab: tranTab, - src: b, - srcPtr: 0, + clspec: clspec, + src: b, + srcPtr: 0, }, nil } @@ -112,7 +114,7 @@ func (l *lexer) peekN(n int) (*Token, error) { } func (l *lexer) next() (*Token, error) { - state := l.tranTab.InitialState + state := l.clspec.DFA.InitialState buf := []byte{} unfixedBufLen := 0 var tok *Token @@ -127,7 +129,7 @@ func (l *lexer) next() (*Token, error) { } buf = append(buf, v) unfixedBufLen++ - entry := l.tranTab.Transition[state] + entry := l.clspec.DFA.Transition[state] if len(entry) == 0 { return nil, fmt.Errorf("no transition entry; state: %v", state) } @@ -140,9 +142,9 @@ func (l *lexer) next() (*Token, error) { return newInvalidToken(buf), nil } state = nextState - id, ok := l.tranTab.AcceptingStates[state] + id, ok := l.clspec.DFA.AcceptingStates[state] if ok { - tok = newToken(id, buf) + tok = newToken(id, l.clspec.Kinds[id], buf) unfixedBufLen = 0 } } diff --git a/driver/lexer_test.go b/driver/lexer_test.go index 1c8d627..133b758 100644 --- a/driver/lexer_test.go +++ b/driver/lexer_test.go @@ -6,38 +6,43 @@ import ( "testing" "github.com/nihei9/maleeni/compiler" + "github.com/nihei9/maleeni/spec" ) func TestLexer_Next(t *testing.T) { test := []struct { - regexps [][]byte - src string - tokens []*Token + lspec *spec.LexSpec + src string + tokens []*Token }{ { - regexps: [][]byte{ - []byte("(a|b)*abb"), - []byte(" *"), + lspec: &spec.LexSpec{ + Entries: []*spec.LexEntry{ + spec.NewLexEntry("t1", "(a|b)*abb"), + spec.NewLexEntry("t2", " *"), + }, }, src: "abb aabb aaabb babb bbabb abbbabb", tokens: []*Token{ - newToken(1, []byte("abb")), - newToken(2, []byte(" ")), - newToken(1, []byte("aabb")), - newToken(2, []byte(" ")), - newToken(1, []byte("aaabb")), - newToken(2, []byte(" ")), - newToken(1, []byte("babb")), - newToken(2, []byte(" ")), - newToken(1, []byte("bbabb")), - newToken(2, []byte(" ")), - newToken(1, []byte("abbbabb")), + newToken(1, "t1", []byte("abb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("aabb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("aaabb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("babb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("bbabb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("abbbabb")), newEOFToken(), }, }, { - regexps: [][]byte{ - []byte("."), + lspec: &spec.LexSpec{ + Entries: []*spec.LexEntry{ + spec.NewLexEntry("t1", "."), + }, }, src: string([]byte{ 0x00, @@ -58,58 +63,52 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newToken(1, []byte{0x00}), - newToken(1, []byte{0x7f}), - newToken(1, []byte{0xc2, 0x80}), - newToken(1, []byte{0xdf, 0xbf}), - newToken(1, []byte{0xe1, 0x80, 0x80}), - newToken(1, []byte{0xec, 0xbf, 0xbf}), - newToken(1, []byte{0xed, 0x80, 0x80}), - newToken(1, []byte{0xed, 0x9f, 0xbf}), - newToken(1, []byte{0xee, 0x80, 0x80}), - newToken(1, []byte{0xef, 0xbf, 0xbf}), - newToken(1, []byte{0xf0, 0x90, 0x80, 0x80}), - newToken(1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), - newToken(1, []byte{0xf1, 0x80, 0x80, 0x80}), - newToken(1, []byte{0xf3, 0xbf, 0xbf, 0xbf}), - newToken(1, []byte{0xf4, 0x80, 0x80, 0x80}), - newToken(1, []byte{0xf4, 0x8f, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0x00}), + newToken(1, "t1", []byte{0x7f}), + newToken(1, "t1", []byte{0xc2, 0x80}), + newToken(1, "t1", []byte{0xdf, 0xbf}), + newToken(1, "t1", []byte{0xe1, 0x80, 0x80}), + newToken(1, "t1", []byte{0xec, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0xed, 0x80, 0x80}), + newToken(1, "t1", []byte{0xed, 0x9f, 0xbf}), + newToken(1, "t1", []byte{0xee, 0x80, 0x80}), + newToken(1, "t1", []byte{0xef, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}), + newToken(1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}), + newToken(1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}), + newToken(1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}), newEOFToken(), }, }, { - regexps: [][]byte{ - []byte("[ab.*|()[\\]]"), + lspec: &spec.LexSpec{ + Entries: []*spec.LexEntry{ + spec.NewLexEntry("t1", "[ab.*|()[\\]]"), + }, }, src: "ab.*|()[]", tokens: []*Token{ - newToken(1, []byte("a")), - newToken(1, []byte("b")), - newToken(1, []byte(".")), - newToken(1, []byte("*")), - newToken(1, []byte("|")), - newToken(1, []byte("(")), - newToken(1, []byte(")")), - newToken(1, []byte("[")), - newToken(1, []byte("]")), + newToken(1, "t1", []byte("a")), + newToken(1, "t1", []byte("b")), + newToken(1, "t1", []byte(".")), + newToken(1, "t1", []byte("*")), + newToken(1, "t1", []byte("|")), + newToken(1, "t1", []byte("(")), + newToken(1, "t1", []byte(")")), + newToken(1, "t1", []byte("[")), + newToken(1, "t1", []byte("]")), newEOFToken(), }, }, } for _, tt := range test { - res := map[int][]byte{} - for i, re := range tt.regexps { - res[i+1] = re - } - dfa, err := compiler.Compile(res) - if err != nil { - t.Fatalf("unexpected error occurred: %v", err) - } - tranTab, err := compiler.GenTransitionTable(dfa) + clspec, err := compiler.Compile(tt.lspec) if err != nil { t.Fatalf("unexpected error occurred: %v", err) } - lexer, err := NewLexer(tranTab, strings.NewReader(tt.src)) + lexer, err := NewLexer(clspec, strings.NewReader(tt.src)) if err != nil { t.Fatalf("unexpecated error occurred; %v", err) } @@ -129,18 +128,16 @@ func TestLexer_Next(t *testing.T) { } func TestLexer_PeekN(t *testing.T) { - dfa, err := compiler.Compile(map[int][]byte{ - 1: []byte("foo"), - 2: []byte("bar"), + clspec, err := compiler.Compile(&spec.LexSpec{ + Entries: []*spec.LexEntry{ + spec.NewLexEntry("", "foo"), + spec.NewLexEntry("", "bar"), + }, }) if err != nil { t.Fatalf("unexpected error occurred: %v", err) } - tranTab, err := compiler.GenTransitionTable(dfa) - if err != nil { - t.Fatalf("unexpected error occurred: %v", err) - } - lex, err := NewLexer(tranTab, strings.NewReader("foobar")) + lex, err := NewLexer(clspec, strings.NewReader("foobar")) if err != nil { t.Fatalf("unexpected error occurred: %v", err) } @@ -201,7 +198,7 @@ func TestLexer_PeekN(t *testing.T) { func testToken(t *testing.T, expected, actual *Token) { t.Helper() - if actual.ID != expected.ID || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid { + if actual.ID != expected.ID || actual.Kind != expected.Kind || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid { t.Errorf("unexpected token; want: %v (\"%v\"), got: %v (\"%v\")", expected, string(expected.Match), actual, string(actual.Match)) } } |