aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/compiler.go26
-rw-r--r--compiler/dfa.go12
-rw-r--r--driver/lexer.go30
-rw-r--r--driver/lexer_test.go127
-rw-r--r--spec/spec.go28
5 files changed, 133 insertions, 90 deletions
diff --git a/compiler/compiler.go b/compiler/compiler.go
index 153ad77..3ae647e 100644
--- a/compiler/compiler.go
+++ b/compiler/compiler.go
@@ -1,9 +1,29 @@
package compiler
-func Compile(regexps map[int][]byte) (*DFA, error) {
- root, symTab, err := parse(regexps)
+import "github.com/nihei9/maleeni/spec"
+
+func Compile(lexspec *spec.LexSpec) (*spec.CompiledLexSpec, error) {
+ var kinds []string
+ var patterns map[int][]byte
+ {
+ kinds = append(kinds, "")
+ patterns = map[int][]byte{}
+ for i, e := range lexspec.Entries {
+ kinds = append(kinds, e.Kind)
+ patterns[i+1] = []byte(e.Pattern)
+ }
+ }
+ root, symTab, err := parse(patterns)
+ if err != nil {
+ return nil, err
+ }
+ dfa := genDFA(root, symTab)
+ tranTab, err := genTransitionTable(dfa)
if err != nil {
return nil, err
}
- return genDFA(root, symTab), nil
+ return &spec.CompiledLexSpec{
+ Kinds: kinds,
+ DFA: tranTab,
+ }, nil
}
diff --git a/compiler/dfa.go b/compiler/dfa.go
index fec93ce..b07954f 100644
--- a/compiler/dfa.go
+++ b/compiler/dfa.go
@@ -2,6 +2,8 @@ package compiler
import (
"sort"
+
+ "github.com/nihei9/maleeni/spec"
)
type DFA struct {
@@ -99,13 +101,7 @@ func genDFA(root astNode, symTab *symbolTable) *DFA {
}
}
-type TransitionTable struct {
- InitialState int `json:"initial_state"`
- AcceptingStates map[int]int `json:"accepting_states"`
- Transition [][]int `json:"transition"`
-}
-
-func GenTransitionTable(dfa *DFA) (*TransitionTable, error) {
+func genTransitionTable(dfa *DFA) (*spec.TransitionTable, error) {
state2Num := map[string]int{}
for i, s := range dfa.States {
state2Num[s] = i + 1
@@ -125,7 +121,7 @@ func GenTransitionTable(dfa *DFA) (*TransitionTable, error) {
tran[state2Num[s]] = entry
}
- return &TransitionTable{
+ return &spec.TransitionTable{
InitialState: state2Num[dfa.InitialState],
AcceptingStates: acc,
Transition: tran,
diff --git a/driver/lexer.go b/driver/lexer.go
index 710d54d..3a6f039 100644
--- a/driver/lexer.go
+++ b/driver/lexer.go
@@ -5,19 +5,21 @@ import (
"io"
"io/ioutil"
- "github.com/nihei9/maleeni/compiler"
+ "github.com/nihei9/maleeni/spec"
)
type Token struct {
ID int
+ Kind string
Match []byte
EOF bool
Invalid bool
}
-func newToken(id int, match []byte) *Token {
+func newToken(id int, kind string, match []byte) *Token {
return &Token{
ID: id,
+ Kind: kind,
Match: match,
}
}
@@ -38,21 +40,21 @@ func newInvalidToken(match []byte) *Token {
}
type lexer struct {
- tranTab *compiler.TransitionTable
- src []byte
- srcPtr int
- tokBuf []*Token
+ clspec *spec.CompiledLexSpec
+ src []byte
+ srcPtr int
+ tokBuf []*Token
}
-func NewLexer(tranTab *compiler.TransitionTable, src io.Reader) (*lexer, error) {
+func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader) (*lexer, error) {
b, err := ioutil.ReadAll(src)
if err != nil {
return nil, err
}
return &lexer{
- tranTab: tranTab,
- src: b,
- srcPtr: 0,
+ clspec: clspec,
+ src: b,
+ srcPtr: 0,
}, nil
}
@@ -112,7 +114,7 @@ func (l *lexer) peekN(n int) (*Token, error) {
}
func (l *lexer) next() (*Token, error) {
- state := l.tranTab.InitialState
+ state := l.clspec.DFA.InitialState
buf := []byte{}
unfixedBufLen := 0
var tok *Token
@@ -127,7 +129,7 @@ func (l *lexer) next() (*Token, error) {
}
buf = append(buf, v)
unfixedBufLen++
- entry := l.tranTab.Transition[state]
+ entry := l.clspec.DFA.Transition[state]
if len(entry) == 0 {
return nil, fmt.Errorf("no transition entry; state: %v", state)
}
@@ -140,9 +142,9 @@ func (l *lexer) next() (*Token, error) {
return newInvalidToken(buf), nil
}
state = nextState
- id, ok := l.tranTab.AcceptingStates[state]
+ id, ok := l.clspec.DFA.AcceptingStates[state]
if ok {
- tok = newToken(id, buf)
+ tok = newToken(id, l.clspec.Kinds[id], buf)
unfixedBufLen = 0
}
}
diff --git a/driver/lexer_test.go b/driver/lexer_test.go
index 1c8d627..133b758 100644
--- a/driver/lexer_test.go
+++ b/driver/lexer_test.go
@@ -6,38 +6,43 @@ import (
"testing"
"github.com/nihei9/maleeni/compiler"
+ "github.com/nihei9/maleeni/spec"
)
func TestLexer_Next(t *testing.T) {
test := []struct {
- regexps [][]byte
- src string
- tokens []*Token
+ lspec *spec.LexSpec
+ src string
+ tokens []*Token
}{
{
- regexps: [][]byte{
- []byte("(a|b)*abb"),
- []byte(" *"),
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ spec.NewLexEntry("t1", "(a|b)*abb"),
+ spec.NewLexEntry("t2", " *"),
+ },
},
src: "abb aabb aaabb babb bbabb abbbabb",
tokens: []*Token{
- newToken(1, []byte("abb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("aabb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("aaabb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("babb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("bbabb")),
- newToken(2, []byte(" ")),
- newToken(1, []byte("abbbabb")),
+ newToken(1, "t1", []byte("abb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("aabb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("aaabb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("babb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("bbabb")),
+ newToken(2, "t2", []byte(" ")),
+ newToken(1, "t1", []byte("abbbabb")),
newEOFToken(),
},
},
{
- regexps: [][]byte{
- []byte("."),
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ spec.NewLexEntry("t1", "."),
+ },
},
src: string([]byte{
0x00,
@@ -58,58 +63,52 @@ func TestLexer_Next(t *testing.T) {
0xf4, 0x8f, 0xbf, 0xbf,
}),
tokens: []*Token{
- newToken(1, []byte{0x00}),
- newToken(1, []byte{0x7f}),
- newToken(1, []byte{0xc2, 0x80}),
- newToken(1, []byte{0xdf, 0xbf}),
- newToken(1, []byte{0xe1, 0x80, 0x80}),
- newToken(1, []byte{0xec, 0xbf, 0xbf}),
- newToken(1, []byte{0xed, 0x80, 0x80}),
- newToken(1, []byte{0xed, 0x9f, 0xbf}),
- newToken(1, []byte{0xee, 0x80, 0x80}),
- newToken(1, []byte{0xef, 0xbf, 0xbf}),
- newToken(1, []byte{0xf0, 0x90, 0x80, 0x80}),
- newToken(1, []byte{0xf0, 0xbf, 0xbf, 0xbf}),
- newToken(1, []byte{0xf1, 0x80, 0x80, 0x80}),
- newToken(1, []byte{0xf3, 0xbf, 0xbf, 0xbf}),
- newToken(1, []byte{0xf4, 0x80, 0x80, 0x80}),
- newToken(1, []byte{0xf4, 0x8f, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0x00}),
+ newToken(1, "t1", []byte{0x7f}),
+ newToken(1, "t1", []byte{0xc2, 0x80}),
+ newToken(1, "t1", []byte{0xdf, 0xbf}),
+ newToken(1, "t1", []byte{0xe1, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xec, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0xed, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xed, 0x9f, 0xbf}),
+ newToken(1, "t1", []byte{0xee, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xef, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}),
+ newToken(1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}),
+ newToken(1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}),
newEOFToken(),
},
},
{
- regexps: [][]byte{
- []byte("[ab.*|()[\\]]"),
+ lspec: &spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ spec.NewLexEntry("t1", "[ab.*|()[\\]]"),
+ },
},
src: "ab.*|()[]",
tokens: []*Token{
- newToken(1, []byte("a")),
- newToken(1, []byte("b")),
- newToken(1, []byte(".")),
- newToken(1, []byte("*")),
- newToken(1, []byte("|")),
- newToken(1, []byte("(")),
- newToken(1, []byte(")")),
- newToken(1, []byte("[")),
- newToken(1, []byte("]")),
+ newToken(1, "t1", []byte("a")),
+ newToken(1, "t1", []byte("b")),
+ newToken(1, "t1", []byte(".")),
+ newToken(1, "t1", []byte("*")),
+ newToken(1, "t1", []byte("|")),
+ newToken(1, "t1", []byte("(")),
+ newToken(1, "t1", []byte(")")),
+ newToken(1, "t1", []byte("[")),
+ newToken(1, "t1", []byte("]")),
newEOFToken(),
},
},
}
for _, tt := range test {
- res := map[int][]byte{}
- for i, re := range tt.regexps {
- res[i+1] = re
- }
- dfa, err := compiler.Compile(res)
- if err != nil {
- t.Fatalf("unexpected error occurred: %v", err)
- }
- tranTab, err := compiler.GenTransitionTable(dfa)
+ clspec, err := compiler.Compile(tt.lspec)
if err != nil {
t.Fatalf("unexpected error occurred: %v", err)
}
- lexer, err := NewLexer(tranTab, strings.NewReader(tt.src))
+ lexer, err := NewLexer(clspec, strings.NewReader(tt.src))
if err != nil {
t.Fatalf("unexpecated error occurred; %v", err)
}
@@ -129,18 +128,16 @@ func TestLexer_Next(t *testing.T) {
}
func TestLexer_PeekN(t *testing.T) {
- dfa, err := compiler.Compile(map[int][]byte{
- 1: []byte("foo"),
- 2: []byte("bar"),
+ clspec, err := compiler.Compile(&spec.LexSpec{
+ Entries: []*spec.LexEntry{
+ spec.NewLexEntry("", "foo"),
+ spec.NewLexEntry("", "bar"),
+ },
})
if err != nil {
t.Fatalf("unexpected error occurred: %v", err)
}
- tranTab, err := compiler.GenTransitionTable(dfa)
- if err != nil {
- t.Fatalf("unexpected error occurred: %v", err)
- }
- lex, err := NewLexer(tranTab, strings.NewReader("foobar"))
+ lex, err := NewLexer(clspec, strings.NewReader("foobar"))
if err != nil {
t.Fatalf("unexpected error occurred: %v", err)
}
@@ -201,7 +198,7 @@ func TestLexer_PeekN(t *testing.T) {
func testToken(t *testing.T, expected, actual *Token) {
t.Helper()
- if actual.ID != expected.ID || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid {
+ if actual.ID != expected.ID || actual.Kind != expected.Kind || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid {
t.Errorf("unexpected token; want: %v (\"%v\"), got: %v (\"%v\")", expected, string(expected.Match), actual, string(actual.Match))
}
}
diff --git a/spec/spec.go b/spec/spec.go
new file mode 100644
index 0000000..d827b68
--- /dev/null
+++ b/spec/spec.go
@@ -0,0 +1,28 @@
+package spec
+
+type LexEntry struct {
+ Kind string `json:"kind"`
+ Pattern string `json:"pattern"`
+}
+
+func NewLexEntry(kind string, pattern string) *LexEntry {
+ return &LexEntry{
+ Kind: kind,
+ Pattern: pattern,
+ }
+}
+
+type LexSpec struct {
+ Entries []*LexEntry `json:"entries"`
+}
+
+type TransitionTable struct {
+ InitialState int `json:"initial_state"`
+ AcceptingStates map[int]int `json:"accepting_states"`
+ Transition [][]int `json:"transition"`
+}
+
+type CompiledLexSpec struct {
+ Kinds []string `json:"kinds"`
+ DFA *TransitionTable `json:"dfa"`
+}