diff options
-rw-r--r-- | compiler/compiler.go | 26 | ||||
-rw-r--r-- | compiler/dfa.go | 12 | ||||
-rw-r--r-- | driver/lexer.go | 30 | ||||
-rw-r--r-- | driver/lexer_test.go | 127 | ||||
-rw-r--r-- | spec/spec.go | 28 |
5 files changed, 133 insertions, 90 deletions
diff --git a/compiler/compiler.go b/compiler/compiler.go index 153ad77..3ae647e 100644 --- a/compiler/compiler.go +++ b/compiler/compiler.go @@ -1,9 +1,29 @@ package compiler -func Compile(regexps map[int][]byte) (*DFA, error) { - root, symTab, err := parse(regexps) +import "github.com/nihei9/maleeni/spec" + +func Compile(lexspec *spec.LexSpec) (*spec.CompiledLexSpec, error) { + var kinds []string + var patterns map[int][]byte + { + kinds = append(kinds, "") + patterns = map[int][]byte{} + for i, e := range lexspec.Entries { + kinds = append(kinds, e.Kind) + patterns[i+1] = []byte(e.Pattern) + } + } + root, symTab, err := parse(patterns) + if err != nil { + return nil, err + } + dfa := genDFA(root, symTab) + tranTab, err := genTransitionTable(dfa) if err != nil { return nil, err } - return genDFA(root, symTab), nil + return &spec.CompiledLexSpec{ + Kinds: kinds, + DFA: tranTab, + }, nil } diff --git a/compiler/dfa.go b/compiler/dfa.go index fec93ce..b07954f 100644 --- a/compiler/dfa.go +++ b/compiler/dfa.go @@ -2,6 +2,8 @@ package compiler import ( "sort" + + "github.com/nihei9/maleeni/spec" ) type DFA struct { @@ -99,13 +101,7 @@ func genDFA(root astNode, symTab *symbolTable) *DFA { } } -type TransitionTable struct { - InitialState int `json:"initial_state"` - AcceptingStates map[int]int `json:"accepting_states"` - Transition [][]int `json:"transition"` -} - -func GenTransitionTable(dfa *DFA) (*TransitionTable, error) { +func genTransitionTable(dfa *DFA) (*spec.TransitionTable, error) { state2Num := map[string]int{} for i, s := range dfa.States { state2Num[s] = i + 1 @@ -125,7 +121,7 @@ func GenTransitionTable(dfa *DFA) (*TransitionTable, error) { tran[state2Num[s]] = entry } - return &TransitionTable{ + return &spec.TransitionTable{ InitialState: state2Num[dfa.InitialState], AcceptingStates: acc, Transition: tran, diff --git a/driver/lexer.go b/driver/lexer.go index 710d54d..3a6f039 100644 --- a/driver/lexer.go +++ b/driver/lexer.go @@ -5,19 +5,21 @@ import ( "io" "io/ioutil" - "github.com/nihei9/maleeni/compiler" + "github.com/nihei9/maleeni/spec" ) type Token struct { ID int + Kind string Match []byte EOF bool Invalid bool } -func newToken(id int, match []byte) *Token { +func newToken(id int, kind string, match []byte) *Token { return &Token{ ID: id, + Kind: kind, Match: match, } } @@ -38,21 +40,21 @@ func newInvalidToken(match []byte) *Token { } type lexer struct { - tranTab *compiler.TransitionTable - src []byte - srcPtr int - tokBuf []*Token + clspec *spec.CompiledLexSpec + src []byte + srcPtr int + tokBuf []*Token } -func NewLexer(tranTab *compiler.TransitionTable, src io.Reader) (*lexer, error) { +func NewLexer(clspec *spec.CompiledLexSpec, src io.Reader) (*lexer, error) { b, err := ioutil.ReadAll(src) if err != nil { return nil, err } return &lexer{ - tranTab: tranTab, - src: b, - srcPtr: 0, + clspec: clspec, + src: b, + srcPtr: 0, }, nil } @@ -112,7 +114,7 @@ func (l *lexer) peekN(n int) (*Token, error) { } func (l *lexer) next() (*Token, error) { - state := l.tranTab.InitialState + state := l.clspec.DFA.InitialState buf := []byte{} unfixedBufLen := 0 var tok *Token @@ -127,7 +129,7 @@ func (l *lexer) next() (*Token, error) { } buf = append(buf, v) unfixedBufLen++ - entry := l.tranTab.Transition[state] + entry := l.clspec.DFA.Transition[state] if len(entry) == 0 { return nil, fmt.Errorf("no transition entry; state: %v", state) } @@ -140,9 +142,9 @@ func (l *lexer) next() (*Token, error) { return newInvalidToken(buf), nil } state = nextState - id, ok := l.tranTab.AcceptingStates[state] + id, ok := l.clspec.DFA.AcceptingStates[state] if ok { - tok = newToken(id, buf) + tok = newToken(id, l.clspec.Kinds[id], buf) unfixedBufLen = 0 } } diff --git a/driver/lexer_test.go b/driver/lexer_test.go index 1c8d627..133b758 100644 --- a/driver/lexer_test.go +++ b/driver/lexer_test.go @@ -6,38 +6,43 @@ import ( "testing" "github.com/nihei9/maleeni/compiler" + "github.com/nihei9/maleeni/spec" ) func TestLexer_Next(t *testing.T) { test := []struct { - regexps [][]byte - src string - tokens []*Token + lspec *spec.LexSpec + src string + tokens []*Token }{ { - regexps: [][]byte{ - []byte("(a|b)*abb"), - []byte(" *"), + lspec: &spec.LexSpec{ + Entries: []*spec.LexEntry{ + spec.NewLexEntry("t1", "(a|b)*abb"), + spec.NewLexEntry("t2", " *"), + }, }, src: "abb aabb aaabb babb bbabb abbbabb", tokens: []*Token{ - newToken(1, []byte("abb")), - newToken(2, []byte(" ")), - newToken(1, []byte("aabb")), - newToken(2, []byte(" ")), - newToken(1, []byte("aaabb")), - newToken(2, []byte(" ")), - newToken(1, []byte("babb")), - newToken(2, []byte(" ")), - newToken(1, []byte("bbabb")), - newToken(2, []byte(" ")), - newToken(1, []byte("abbbabb")), + newToken(1, "t1", []byte("abb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("aabb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("aaabb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("babb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("bbabb")), + newToken(2, "t2", []byte(" ")), + newToken(1, "t1", []byte("abbbabb")), newEOFToken(), }, }, { - regexps: [][]byte{ - []byte("."), + lspec: &spec.LexSpec{ + Entries: []*spec.LexEntry{ + spec.NewLexEntry("t1", "."), + }, }, src: string([]byte{ 0x00, @@ -58,58 +63,52 @@ func TestLexer_Next(t *testing.T) { 0xf4, 0x8f, 0xbf, 0xbf, }), tokens: []*Token{ - newToken(1, []byte{0x00}), - newToken(1, []byte{0x7f}), - newToken(1, []byte{0xc2, 0x80}), - newToken(1, []byte{0xdf, 0xbf}), - newToken(1, []byte{0xe1, 0x80, 0x80}), - newToken(1, []byte{0xec, 0xbf, 0xbf}), - newToken(1, []byte{0xed, 0x80, 0x80}), - newToken(1, []byte{0xed, 0x9f, 0xbf}), - newToken(1, []byte{0xee, 0x80, 0x80}), - newToken(1, []byte{0xef, 0xbf, 0xbf}), - newToken(1, []byte{0xf0, 0x90, 0x80, 0x80}), - newToken(1, []byte{0xf0, 0xbf, 0xbf, 0xbf}), - newToken(1, []byte{0xf1, 0x80, 0x80, 0x80}), - newToken(1, []byte{0xf3, 0xbf, 0xbf, 0xbf}), - newToken(1, []byte{0xf4, 0x80, 0x80, 0x80}), - newToken(1, []byte{0xf4, 0x8f, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0x00}), + newToken(1, "t1", []byte{0x7f}), + newToken(1, "t1", []byte{0xc2, 0x80}), + newToken(1, "t1", []byte{0xdf, 0xbf}), + newToken(1, "t1", []byte{0xe1, 0x80, 0x80}), + newToken(1, "t1", []byte{0xec, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0xed, 0x80, 0x80}), + newToken(1, "t1", []byte{0xed, 0x9f, 0xbf}), + newToken(1, "t1", []byte{0xee, 0x80, 0x80}), + newToken(1, "t1", []byte{0xef, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0xf0, 0x90, 0x80, 0x80}), + newToken(1, "t1", []byte{0xf0, 0xbf, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0xf1, 0x80, 0x80, 0x80}), + newToken(1, "t1", []byte{0xf3, 0xbf, 0xbf, 0xbf}), + newToken(1, "t1", []byte{0xf4, 0x80, 0x80, 0x80}), + newToken(1, "t1", []byte{0xf4, 0x8f, 0xbf, 0xbf}), newEOFToken(), }, }, { - regexps: [][]byte{ - []byte("[ab.*|()[\\]]"), + lspec: &spec.LexSpec{ + Entries: []*spec.LexEntry{ + spec.NewLexEntry("t1", "[ab.*|()[\\]]"), + }, }, src: "ab.*|()[]", tokens: []*Token{ - newToken(1, []byte("a")), - newToken(1, []byte("b")), - newToken(1, []byte(".")), - newToken(1, []byte("*")), - newToken(1, []byte("|")), - newToken(1, []byte("(")), - newToken(1, []byte(")")), - newToken(1, []byte("[")), - newToken(1, []byte("]")), + newToken(1, "t1", []byte("a")), + newToken(1, "t1", []byte("b")), + newToken(1, "t1", []byte(".")), + newToken(1, "t1", []byte("*")), + newToken(1, "t1", []byte("|")), + newToken(1, "t1", []byte("(")), + newToken(1, "t1", []byte(")")), + newToken(1, "t1", []byte("[")), + newToken(1, "t1", []byte("]")), newEOFToken(), }, }, } for _, tt := range test { - res := map[int][]byte{} - for i, re := range tt.regexps { - res[i+1] = re - } - dfa, err := compiler.Compile(res) - if err != nil { - t.Fatalf("unexpected error occurred: %v", err) - } - tranTab, err := compiler.GenTransitionTable(dfa) + clspec, err := compiler.Compile(tt.lspec) if err != nil { t.Fatalf("unexpected error occurred: %v", err) } - lexer, err := NewLexer(tranTab, strings.NewReader(tt.src)) + lexer, err := NewLexer(clspec, strings.NewReader(tt.src)) if err != nil { t.Fatalf("unexpecated error occurred; %v", err) } @@ -129,18 +128,16 @@ func TestLexer_Next(t *testing.T) { } func TestLexer_PeekN(t *testing.T) { - dfa, err := compiler.Compile(map[int][]byte{ - 1: []byte("foo"), - 2: []byte("bar"), + clspec, err := compiler.Compile(&spec.LexSpec{ + Entries: []*spec.LexEntry{ + spec.NewLexEntry("", "foo"), + spec.NewLexEntry("", "bar"), + }, }) if err != nil { t.Fatalf("unexpected error occurred: %v", err) } - tranTab, err := compiler.GenTransitionTable(dfa) - if err != nil { - t.Fatalf("unexpected error occurred: %v", err) - } - lex, err := NewLexer(tranTab, strings.NewReader("foobar")) + lex, err := NewLexer(clspec, strings.NewReader("foobar")) if err != nil { t.Fatalf("unexpected error occurred: %v", err) } @@ -201,7 +198,7 @@ func TestLexer_PeekN(t *testing.T) { func testToken(t *testing.T, expected, actual *Token) { t.Helper() - if actual.ID != expected.ID || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid { + if actual.ID != expected.ID || actual.Kind != expected.Kind || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid { t.Errorf("unexpected token; want: %v (\"%v\"), got: %v (\"%v\")", expected, string(expected.Match), actual, string(actual.Match)) } } diff --git a/spec/spec.go b/spec/spec.go new file mode 100644 index 0000000..d827b68 --- /dev/null +++ b/spec/spec.go @@ -0,0 +1,28 @@ +package spec + +type LexEntry struct { + Kind string `json:"kind"` + Pattern string `json:"pattern"` +} + +func NewLexEntry(kind string, pattern string) *LexEntry { + return &LexEntry{ + Kind: kind, + Pattern: pattern, + } +} + +type LexSpec struct { + Entries []*LexEntry `json:"entries"` +} + +type TransitionTable struct { + InitialState int `json:"initial_state"` + AcceptingStates map[int]int `json:"accepting_states"` + Transition [][]int `json:"transition"` +} + +type CompiledLexSpec struct { + Kinds []string `json:"kinds"` + DFA *TransitionTable `json:"dfa"` +} |