diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-02-14 01:16:29 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-02-14 01:16:29 +0900 |
commit | b451c9fb7ef92c7fdbbb4ca4afc3794d7080041c (patch) | |
tree | d991027b9a00fe1a610a1ca3192c64db35481035 /driver/lexer_test.go | |
parent | Add compiler (diff) | |
download | tre-b451c9fb7ef92c7fdbbb4ca4afc3794d7080041c.tar.gz tre-b451c9fb7ef92c7fdbbb4ca4afc3794d7080041c.tar.xz |
Add driver
The driver takes a DFA and an input text and generates a lexer. The lexer tokenizes the input text according to the lexical specification that the DFA expresses.
Diffstat (limited to 'driver/lexer_test.go')
-rw-r--r-- | driver/lexer_test.go | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/driver/lexer_test.go b/driver/lexer_test.go new file mode 100644 index 0000000..4fd4bf8 --- /dev/null +++ b/driver/lexer_test.go @@ -0,0 +1,147 @@ +package driver + +import ( + "bytes" + "strings" + "testing" + + "github.com/nihei9/maleeni/compiler" +) + +func TestLexer_Next(t *testing.T) { + test := []struct { + regexps [][]byte + src string + tokens []*Token + }{ + { + regexps: [][]byte{ + []byte("(a|b)*abb"), + []byte(" *"), + }, + src: "abb aabb aaabb babb bbabb abbbabb", + tokens: []*Token{ + newToken(1, []byte("abb")), + newToken(2, []byte(" ")), + newToken(1, []byte("aabb")), + newToken(2, []byte(" ")), + newToken(1, []byte("aaabb")), + newToken(2, []byte(" ")), + newToken(1, []byte("babb")), + newToken(2, []byte(" ")), + newToken(1, []byte("bbabb")), + newToken(2, []byte(" ")), + newToken(1, []byte("abbbabb")), + newEOFToken(), + }, + }, + } + for _, tt := range test { + res := map[int][]byte{} + for i, re := range tt.regexps { + res[i+1] = re + } + dfa, err := compiler.Compile(res) + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + tranTab, err := compiler.GenTransitionTable(dfa) + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + lexer, err := NewLexer(tranTab, strings.NewReader(tt.src)) + if err != nil { + t.Fatalf("unexpecated error occurred; %v", err) + } + for _, eTok := range tt.tokens { + tok, err := lexer.Next() + if err != nil { + t.Log(err) + break + } + testToken(t, eTok, tok) + t.Logf("token: ID: %v, Match: %+v Text: \"%v\", EOF: %v, Invalid: %v", tok.ID, tok.Match, string(tok.Match), tok.EOF, tok.Invalid) + if tok.EOF { + break + } + } + } +} + +func TestLexer_PeekN(t *testing.T) { + dfa, err := compiler.Compile(map[int][]byte{ + 1: []byte("foo"), + 2: []byte("bar"), + }) + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + tranTab, err := compiler.GenTransitionTable(dfa) + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + lex, err := NewLexer(tranTab, strings.NewReader("foobar")) + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + + expectedTokens := []*Token{ + { + ID: 1, + Match: []byte("foo"), + }, + { + ID: 2, + Match: []byte("bar"), + }, + { + EOF: true, + }, + } + + tok, err := lex.Peek1() + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + if tok == nil { + t.Fatalf("token is nil") + } + testToken(t, expectedTokens[0], tok) + + tok, err = lex.Peek2() + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + if tok == nil { + t.Fatalf("token is nil") + } + testToken(t, expectedTokens[1], tok) + + tok, err = lex.Peek3() + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + if tok == nil { + t.Fatalf("token is nil") + } + testToken(t, expectedTokens[2], tok) + + for _, eTok := range expectedTokens { + tok, err = lex.Next() + if err != nil { + t.Fatalf("unexpected error occurred: %v", err) + } + if tok == nil { + t.Fatalf("token is nil") + } + testToken(t, eTok, tok) + } +} + +func testToken(t *testing.T, expected, actual *Token) { + t.Helper() + + if actual.ID != expected.ID || !bytes.Equal(actual.Match, expected.Match) || actual.EOF != expected.EOF || actual.Invalid != expected.Invalid { + t.Errorf("unexpected token; want: %v, got: %v", expected, actual) + } +} |