aboutsummaryrefslogtreecommitdiff
path: root/driver
diff options
context:
space:
mode:
authorRyo Nihei <nihei.dev@gmail.com>2021-02-16 00:07:40 +0900
committerRyo Nihei <nihei.dev@gmail.com>2021-02-16 00:07:40 +0900
commitc313f7870bd547534c7c7bb0ad01003ab9983b34 (patch)
treef1b6a443c4372f9eb69f314009d721703e208a0b /driver
parentAdd bracket expression matching specified character (diff)
downloadtre-c313f7870bd547534c7c7bb0ad01003ab9983b34.tar.gz
tre-c313f7870bd547534c7c7bb0ad01003ab9983b34.tar.xz
Add types of lexical specifications
APIs of compiler and driver packages use these types. Because CompiledLexSpec struct a lexer takes has kind names of lexical specification entries, the lexer sets them to tokens.
Diffstat (limited to 'driver')
-rw-r--r--driver/lexer.go30
-rw-r--r--driver/lexer_test.go127
2 files changed, 78 insertions, 79 deletions
diff --git a/driver/lexer.go b/driver/lexer.go
index 710d54d..3a6f039 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -5,19 +5,21 @@ import (
"io"
"io/ioutil"
- "github.com/nihei9/maleeni/compiler"
+ "github.com/nihei9/maleeni/spec"
)
type Token struct {
ID int
+ Kind string
Match []byte
EOF bool
Invalid bool
}
-func newToken(id int, match []byte) *Token {
+func newToken(id int, kind string, match []byte) *Token {
return &Token{
ID: id,
+ Kind: kind,
Match: match,
}
}
@@ -38,21 +40,21 @@ func newInvalidToken(match []byte) *Token {
}
type lexer struct {
- tranTab *compiler.TransitionTable
- src []byte
- srcPtr int
- tokBuf []*Token
+ clspec *spec.CompiledLexSpec
+ src []byte
+ srcPtr int
+ tokBuf []*Token
}
-func NewLexer(tranTab *compiler.TransitionTable, src io.Reader) (*lexer, error) {
+func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader) (*lexer, error) {
b, err := ioutil.ReadAll(src)
if err != nil {
return nil, err
}
return &lexer{
- tranTab: tranTab,
- src: b,
- srcPtr: 0,
+ clspec: clspec,
+ src: b,
+ srcPtr: 0,
}, nil
}
@@ -112,7 +114,7 @@ func (l *lexer) peekN(n int) (*Token, error) {
}
func (l *lexer) next() (*Token, error) {
- state := l.tranTab.InitialState
+ state := l.clspec.DFA.InitialState
buf := []byte{}
unfixedBufLen := 0
var tok *Token
@@ -127,7 +129,7 @@ func (l *lexer) next() (*Token, error) {
}
buf = append(buf, v)
unfixedBufLen++
- entry := l.tranTab.Transition[state]
+ entry := l.clspec.DFA.Transition[state]
if len(entry) == 0 {
return nil, fmt.Errorf("no transition entry; state: %v", state)
}
@@ -140,9 +142,9 @@ func (l *lexer) next() (*Token, error) {
return newInvalidToken(buf), nil
}
state = nextState
- id, ok := l.tranTab.AcceptingStates[state]
+ id, ok := l.clspec.DFA.AcceptingStates[state]
if ok {
- tok = newToken(id, buf)
+ tok = newToken(id, l.clspec.Kinds[id], buf)
unfixedBufLen = 0
}
}
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 1c8d627..133b758 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -6,38 +6,43 @@ import (
"testing"
"github.com/nihei9/maleeni/compiler"
+ "github.com/nihei9/maleeni/spec"
)
func TestLexer_Next(t *testing.T) {
test := []struct {
- regexps [][]byte
- src string
- tokens []*Token
+ lspec *spec.LexSpec
+ src string
+ tokens []*Token
}{
{
- regexps: [][]byte{
- []byte("(a|b)*abb"),
- []byte(" *"),
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ spec.NewLexEntry("t1", "(a|b)*abb"),
+ spec.NewLexEntry("t2", " *"),
+ },
},
src: "abb aabb aaabb babb bbabb abbbabb",
tokens: []*Token{
- newToken(1, []byte("abb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("aabb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("aaabb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("babb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("bbabb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("abbbabb")),
+ newToken(1, "t1", []byte("abb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("aabb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("aaabb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("babb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("bbabb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("abbbabb")),
newEOFToken(),
},
},
{
- regexps: [][]byte{
- []byte("."),
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ spec.NewLexEntry("t1", "."),
+ },
},
src: string([]byte{
0x00,
@@ -58,58 +63,52 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, []byte{0x00}),
- newToken(1, []byte{0x7f}),
- newToken(1, []byte{0xc2, 0x80}),
- newToken(1, []byte{0xdf, 0xbf}),
- newToken(1, []byte{0xe1, 0x80, 0x80}),
- newToken(1, []byte{0xec, 0xbf, 0xbf}),
- newToken(1, []byte{0xed, 0x80, 0x80}),
- newToken(1, []byte{0xed, 0x9f, 0xbf}),
- newToken(1, []byte{0xee, 0x80, 0x80}),
- newToken(1, []byte{0xef, 0xbf, 0xbf}),
- newToken(1, []byte{0xf0, 0x90, 0x80, 0x80}),
- newToken(1, []byte{0xf0, 0xbf, 0xbf, 0xbf}),
- newToken(1, []byte{0xf1, 0x80, 0x80, 0x80}),
- newToken(1, []byte{0xf3, 0xbf, 0xbf, 0xbf}),
- newToken(1, []byte{0xf4, 0x80, 0x80, 0x80}),
- newToken(1, []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0x00}),
+ newToken(1, "t1", []byte{0x7f}),
+ newToken(1, "t1", []byte{0xc2, 0x80}),
+ newToken(1, "t1", []byte{0xdf, 0xbf}),
+ newToken(1, "t1", []byte{0xe1, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xec, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0xed, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xed, 0x9f, 0xbf}),
+ newToken(1, "t1", []byte{0xee, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xef, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
newEOFToken(),
},
},
{
- regexps: [][]byte{
- []byte("[ab.*|()[\\]]"),
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ spec.NewLexEntry("t1", "[ab.*|()[\\]]"),
+ },
},
src: "ab.*|()[]",
tokens: []*Token{
- newToken(1, []byte("a")),
- newToken(1, []byte("b")),
- newToken(1, []byte(".")),
- newToken(1, []byte("*")),
- newToken(1, []byte("|")),
- newToken(1, []byte("(")),
- newToken(1, []byte(")")),
- newToken(1, []byte("[")),
- newToken(1, []byte("]")),
+ newToken(1, "t1", []byte("a")),
+ newToken(1, "t1", []byte("b")),
+ newToken(1, "t1", []byte(".")),
+ newToken(1, "t1", []byte("*")),
+ newToken(1, "t1", []byte("|")),
+ newToken(1, "t1", []byte("(")),
+ newToken(1, "t1", []byte(")")),
+ newToken(1, "t1", []byte("[")),
+ newToken(1, "t1", []byte("]")),
newEOFToken(),
},
},
}
for _, tt := range test {
- res := map[int][]byte{}
- for i, re := range tt.regexps {
- res[i+1] = re
- }
- dfa, err := compiler.Compile(res)
- if err != nil {
- t.Fatalf("unexpected error occurred: %v", err)
- }
- tranTab, err := compiler.GenTransitionTable(dfa)
+ clspec, err := compiler.Compile(tt.lspec)
if err != nil {
t.Fatalf("unexpected error occurred: %v", err)
}
- lexer, err := NewLexer(tranTab, strings.NewReader(tt.src))
+ lexer, err := NewLexer(clspec, strings.NewReader(tt.src))
if err != nil {
t.Fatalf("unexpecated error occurred; %v", err)
}
@@ -129,18 +128,16 @@ func TestLexer_Next(t *testing.T) {
}
func TestLexer_PeekN(t *testing.T) {
- dfa, err := compiler.Compile(map[int][]byte{
- 1: []byte("foo"),
- 2: []byte("bar"),
+ clspec, err := compiler.Compile(&spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ spec.NewLexEntry("", "foo"),
+ spec.NewLexEntry("", "bar"),
+ },
})
if err != nil {
t.Fatalf("unexpected error occurred: %v", err)
}
- tranTab, err := compiler.GenTransitionTable(dfa)
- if err != nil {
- t.Fatalf("unexpected error occurred: %v", err)
- }
- lex, err := NewLexer(tranTab, strings.NewReader("foobar"))
+ lex, err := NewLexer(clspec, strings.NewReader("foobar"))
if err != nil {
t.Fatalf("unexpected error occurred: %v", err)
}
@@ -201,7 +198,7 @@ func TestLexer_PeekN(t *testing.T) {
func testToken(t *testing.T, expected, actual *Token) {
t.Helper()
- if actual.ID != expected.ID || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid {
+ if actual.ID != expected.ID || actual.Kind != expected.Kind || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid {
t.Errorf("unexpected token; want: %v (\"%v\"), got: %v (\"%v\")", expected, string(expected.Match), actual, string(actual.Match))
}
}