diff options
author | Ryo Nihei <nihei.dev@gmail.com> | 2021-04-11 16:41:05 +0900 |
---|---|---|
committer | Ryo Nihei <nihei.dev@gmail.com> | 2021-04-11 16:41:05 +0900 |
commit | a13655e3936ba9166051914832dedcbdb28b056c (patch) | |
tree | c6fcd27c4769aecf16d4f8d2a43f1297f62b6caa /compiler/lexer_test.go | |
parent | Add logging to compile command (diff) | |
download | tre-a13655e3936ba9166051914832dedcbdb28b056c.tar.gz tre-a13655e3936ba9166051914832dedcbdb28b056c.tar.xz |
Fix grammar the parser accepts
* Add cases test the parse method.
* Fix the parser to pass the cases.
Diffstat (limited to 'compiler/lexer_test.go')
-rw-r--r-- | compiler/lexer_test.go | 73 |
1 files changed, 72 insertions, 1 deletions
diff --git a/compiler/lexer_test.go b/compiler/lexer_test.go index 0dfe2f7..451c3c7 100644 --- a/compiler/lexer_test.go +++ b/compiler/lexer_test.go @@ -31,7 +31,7 @@ func TestLexer(t *testing.T) { }, { caption: "lexer can recognize the special characters", - src: ".*+?|()[-][^^]", + src: ".*+?|()[a-z][^^]", tokens: []*token{ newToken(tokenKindAnyChar, nullChar), newToken(tokenKindRepeat, nullChar), @@ -41,7 +41,9 @@ func TestLexer(t *testing.T) { newToken(tokenKindGroupOpen, nullChar), newToken(tokenKindGroupClose, nullChar), newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), newToken(tokenKindBExpClose, nullChar), newToken(tokenKindInverseBExpOpen, nullChar), newToken(tokenKindChar, '^'), @@ -98,6 +100,75 @@ func TestLexer(t *testing.T) { }, }, { + caption: "hyphen symbols that appear in bracket expressions are handled as the character range symbol or ordinary characters", + // [...-...][...-][-...][-] + // ~~~~~~~ ~ ~ ~ + // ^ ^ ^ ^ + // | | | `-- Ordinary Character (b) + // | | `-- Ordinary Character (b) + // | `-- Ordinary Character (b) + // `-- Character Range (a) + // + // a. *-* is handled as a character range expression. + // b. *-, -*, or - are handled as ordinary characters. + src: "[a-z][a-][-z][-][--][---][^a-z][^a-][^-z][^-][^--][^---]", + tokens: []*token{ + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, 'a'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, 'z'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + newToken(tokenKindInverseBExpOpen, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindCharRange, nullChar), + newToken(tokenKindChar, '-'), + newToken(tokenKindBExpClose, nullChar), + + newToken(tokenKindEOF, nullChar), + }, + }, + { caption: "caret symbols that appear in bracket expressions are handled as the logical inverse symbol or ordinary characters", // [^...^...][^] // ~~ ~ ~~ |